In [1]:
import pandas as pd
import numpy as np

# load the preprocessed bans/picks file
df = pd.read_pickle("df_step2_bans_picks.pkl")
df.head()
df.shape

(101843, 34)

In [2]:
# helper: collect all participants into a long table
long_rows = []

for i in range(10):
    champ_col = f"participant{i}ChampionName"
    pos_col   = f"participant{i}TeamPosition"
    
    tmp = df[["matchId", "gameVersion", champ_col, pos_col]].copy()
    tmp = tmp.rename(columns={
        champ_col: "champion",
        pos_col:   "position"
    })
    
    tmp["slot"] = i
    tmp["team"] = np.where(i < 5, 0, 1)  # 0 = blue side, 1 = red side
    
    long_rows.append(tmp)

players_long = pd.concat(long_rows, ignore_index=True)

players_long.head()

Unnamed: 0,matchId,gameVersion,champion,position,slot,team
0,NA1_5348438296,15.16.704.6097,Teemo,TOP,0,0
1,NA1_5348419072,15.16.704.6097,Gangplank,TOP,0,0
2,NA1_5345908214,15.15.701.6241,Malphite,TOP,0,0
3,NA1_5341292117,15.15.701.6241,Gangplank,TOP,0,0
4,NA1_5341241370,15.15.701.6241,Shen,TOP,0,0


In [3]:
team_picks = (
    players_long
    .pivot_table(
        index=["matchId", "gameVersion", "team"],
        columns="position",
        values="champion",
        aggfunc="first"
    )
)

team_picks.columns = [f"ally_{c}" for c in team_picks.columns]
team_picks = team_picks.reset_index()

In [None]:
roles = ["TOP", "JUNGLE", "MIDDLE", "BOTTOM", "UTILITY"]
ally_cols = [f"ally_{r}" for r in roles]

# the enemy picks by flipping team
enemy_view = team_picks[["matchId", "gameVersion", "team"] + ally_cols].copy()

# from each team's perspective, the enemy is the other team
enemy_view["team"] = 1 - enemy_view["team"]

# rename ally_* -> enemy_*
enemy_view = enemy_view.rename(
    columns={col: col.replace("ally_", "enemy_") for col in ally_cols}
)

# merge back so each row has both ally_* and enemy_* picks
team_picks_with_enemy = team_picks.merge(
    enemy_view,
    on=["matchId", "gameVersion", "team"],
    how="left"
)

team_picks_with_enemy.head()


Unnamed: 0,matchId,gameVersion,team,ally_BOTTOM,ally_JUNGLE,ally_MIDDLE,ally_TOP,ally_UTILITY,enemy_TOP,enemy_JUNGLE,enemy_MIDDLE,enemy_BOTTOM,enemy_UTILITY
0,NA1_5326075748,15.13.693.4876,0,Vayne,Kindred,Azir,Pantheon,Milio,Aatrox,Trundle,Velkoz,Smolder,Anivia
1,NA1_5326075748,15.13.693.4876,1,Smolder,Trundle,Velkoz,Aatrox,Anivia,Pantheon,Kindred,Azir,Vayne,Milio
2,NA1_5326077345,15.13.693.4876,0,Jinx,Viego,Xerath,Garen,Yuumi,Renekton,Maokai,Swain,Ezreal,Janna
3,NA1_5326077345,15.13.693.4876,1,Ezreal,Maokai,Swain,Renekton,Janna,Garen,Viego,Xerath,Jinx,Yuumi
4,NA1_5326077813,15.13.693.4876,0,Veigar,Pantheon,AurelionSol,Chogath,MissFortune,Kennen,Hecarim,Zed,Caitlyn,Lux


In [5]:
ban_rows = []

for t in [0, 1]:
    for i in range(5):
        col = f"team{t}Ban{i}ChampionName"
        
        tmp = df[["matchId", col]].copy()
        tmp["team"] = t
        tmp["ban_index"] = i
        tmp["champion"] = tmp[col]
        tmp = tmp[["matchId", "team", "ban_index", "champion"]]
        
        ban_rows.append(tmp)

bans_long = pd.concat(ban_rows, ignore_index=True)

In [None]:
# ally bans
ally_bans = (
    bans_long
    .rename(columns={"champion": "ally_ban"})
    .pivot_table(
        index=["matchId", "team"],
        columns="ban_index",
        values="ally_ban",
        aggfunc="first"
    )
)
ally_bans.columns = [f"ally_ban{i}" for i in range(5)]
ally_bans = ally_bans.reset_index()


In [None]:
# enemy bans
enemy_bans = (
    bans_long
    .assign(team=lambda x: 1 - x["team"])
    .rename(columns={"champion": "enemy_ban"})
    .pivot_table(
        index=["matchId", "team"],
        columns="ban_index",
        values="enemy_ban",
        aggfunc="first"
    )
)
enemy_bans.columns = [f"enemy_ban{i}" for i in range(5)]
enemy_bans = enemy_bans.reset_index()



In [8]:
wins = df[["matchId", "team0Win", "team1Win"]].drop_duplicates("matchId")

wins_long = (
    wins
    .melt(id_vars="matchId", value_vars=["team0Win", "team1Win"],
          var_name="t", value_name="win")
)

wins_long["team"] = wins_long["t"].map({"team0Win": 0, "team1Win": 1})
wins_long = wins_long[["matchId", "team", "win"]]


In [9]:
team_df = (
    team_picks_with_enemy
    .merge(ally_bans, on=["matchId", "team"], how="left")
    .merge(enemy_bans, on=["matchId", "team"], how="left")
    .merge(wins_long, on=["matchId", "team"], how="left")
)
#print column names
print(team_df.columns.tolist())
#save team_df
team_df.to_pickle("team_df.pkl")

['matchId', 'gameVersion', 'team', 'ally_BOTTOM', 'ally_JUNGLE', 'ally_MIDDLE', 'ally_TOP', 'ally_UTILITY', 'enemy_TOP', 'enemy_JUNGLE', 'enemy_MIDDLE', 'enemy_BOTTOM', 'enemy_UTILITY', 'ally_ban0', 'ally_ban1', 'ally_ban2', 'ally_ban3', 'ally_ban4', 'enemy_ban0', 'enemy_ban1', 'enemy_ban2', 'enemy_ban3', 'enemy_ban4', 'win']


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier

roles = ["TOP", "JUNGLE", "MIDDLE", "BOTTOM", "UTILITY"]

for role in roles:
    print(f"\n=== Training RF for role: {role} (target = ally_{role}) ===")
    target_col = f"ally_{role}"

    # Keep only rows where this role exists
    role_df = team_df.dropna(subset=[target_col]).copy()

    # Drop ultra-rare champs for this role
    min_count = 2
    vc = role_df[target_col].value_counts()
    keep_labels = vc[vc >= min_count].index
    n_before = len(role_df)
    role_df = role_df[role_df[target_col].isin(keep_labels)].copy()
    n_after = len(role_df)
    print(f"  Dropped {n_before - n_after} rows with too-rare {role} champs.")
    print(f"  Remaining unique {role} champs: {role_df[target_col].nunique()}")

    if role_df[target_col].nunique() < 2:
        print("  Not enough classes after filtering; skipping this role.")
        continue

    # Define X, y
    y = role_df[target_col]
    X = role_df.drop(
        columns=[
            target_col,
            "matchId",
            # drop win
            "win",
            # "team",
        ]
    )
    # Encode y
    le = LabelEncoder()
    y_enc = le.fit_transform(y)

    # Train/test split with stratification
    X_train, X_test, y_train, y_test = train_test_split(
        X,
        y_enc,
        test_size=0.2,
        random_state=42,
        stratify=y_enc,
    )

    # Build preprocessing + RF
    cat_cols = X.columns.tolist()
    preprocessor = ColumnTransformer(
        transformers=[
            ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols)
        ]
    )

    clf = Pipeline(
        steps=[
            ("prep", preprocessor),
            ("rf", RandomForestClassifier(
                n_estimators=200,
                max_depth=30,
                min_samples_leaf=2,
                max_features="sqrt",
                n_jobs=1,
                random_state=42,
            )),
        ]
    )

    # Train
    clf.fit(X_train, y_train)

    # Evaluate
    acc = clf.score(X_test, y_test)
    print(
        f"{role}: accuracy = {acc:.3f}  |  "
        f"n_train = {len(X_train)}, n_test = {len(X_test)}"
    )



=== Training RF for role: TOP (target = ally_TOP) ===
  Dropped 0 rows with too-rare TOP champs.
  Remaining unique TOP champs: 171
TOP: accuracy = 0.073  |  n_train = 162854, n_test = 40714

=== Training RF for role: JUNGLE (target = ally_JUNGLE) ===
  Dropped 7 rows with too-rare JUNGLE champs.
  Remaining unique JUNGLE champs: 158
JUNGLE: accuracy = 0.069  |  n_train = 162872, n_test = 40718

=== Training RF for role: MIDDLE (target = ally_MIDDLE) ===
  Dropped 2 rows with too-rare MIDDLE champs.
  Remaining unique MIDDLE champs: 169
MIDDLE: accuracy = 0.062  |  n_train = 162856, n_test = 40715

=== Training RF for role: BOTTOM (target = ally_BOTTOM) ===
  Dropped 8 rows with too-rare BOTTOM champs.
  Remaining unique BOTTOM champs: 161
BOTTOM: accuracy = 0.143  |  n_train = 162864, n_test = 40717

=== Training RF for role: UTILITY (target = ally_UTILITY) ===
  Dropped 2 rows with too-rare UTILITY champs.
  Remaining unique UTILITY champs: 166
UTILITY: accuracy = 0.118  |  n_train 

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier

roles = ["TOP", "JUNGLE", "MIDDLE", "BOTTOM", "UTILITY"]

for role in roles:
    print(f"\n=== Training RF for role: {role} (target = ally_{role}) ===")
    target_col = f"ally_{role}"

    # Keep only rows where this role exists
    role_df = team_df.dropna(subset=[target_col]).copy()

    # Drop ultra-rare champs for this role
    min_count = 2
    vc = role_df[target_col].value_counts()
    keep_labels = vc[vc >= min_count].index
    n_before = len(role_df)
    role_df = role_df[role_df[target_col].isin(keep_labels)].copy()
    n_after = len(role_df)
    print(f"  Dropped {n_before - n_after} rows with too-rare {role} champs.")
    print(f"  Remaining unique {role} champs: {role_df[target_col].nunique()}")

    if role_df[target_col].nunique() < 2:
        print("  Not enough classes after filtering; skipping this role.")
        continue

    # Define X, y
    y = role_df[target_col]
    X = role_df.drop(
        columns=[
            target_col,
            "matchId",
            # drop win
            "win",
            # "team",
        ]
    )

    # Encode target
    le = LabelEncoder()
    y_enc = le.fit_transform(y)

    # Train/test split with stratification (now safe)
    X_train, X_test, y_train, y_test = train_test_split(
        X,
        y_enc,
        test_size=0.2,
        random_state=42,
        stratify=y_enc,
    )

    from sklearn.model_selection import RandomizedSearchCV

    # Build preprocessing
    cat_cols = X.columns.tolist()
    preprocessor = ColumnTransformer(
        transformers=[
            ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols)
        ]
    )

    # Base model (Random Forest)
    rf_base = RandomForestClassifier(random_state=42,n_jobs=-1)

    pipe = Pipeline(
        steps=[
            ("prep", preprocessor),
            ("rf", rf_base)
        ]
    )

    # Hyperparameter space
    param_dist = {
        "rf__n_estimators": [100, 200, 400],
        "rf__max_depth": [None, 20, 40, 60],
        "rf__min_samples_leaf": [1, 2, 4],
        "rf__min_samples_split": [2, 5, 10],
        "rf__max_features": ["sqrt", "log2"],
    }

    # Randomized Search
    search = RandomizedSearchCV(
        estimator=pipe,
        param_distributions=param_dist,
        n_iter=12,
        cv=3,
        scoring="accuracy",
        n_jobs=1,
        random_state=42,
        verbose=1,
    )

    # Train using hyperparam tuning
    search.fit(X_train, y_train)

    # Evaluate
    best_model = search.best_estimator_
    acc = best_model.score(X_test, y_test)

    print(f"{role}: accuracy = {acc:.3f}")
    print("  Best params:", search.best_params_)


=== Training RF for role: TOP (target = ally_TOP) ===
  Dropped 0 rows with too-rare TOP champs.
  Remaining unique TOP champs: 171
Fitting 3 folds for each of 12 candidates, totalling 36 fits
TOP: accuracy = 0.082
  Best params: {'rf__n_estimators': 100, 'rf__min_samples_split': 10, 'rf__min_samples_leaf': 2, 'rf__max_features': 'sqrt', 'rf__max_depth': None}

=== Training RF for role: JUNGLE (target = ally_JUNGLE) ===
  Dropped 7 rows with too-rare JUNGLE champs.
  Remaining unique JUNGLE champs: 158
Fitting 3 folds for each of 12 candidates, totalling 36 fits




JUNGLE: accuracy = 0.074
  Best params: {'rf__n_estimators': 400, 'rf__min_samples_split': 2, 'rf__min_samples_leaf': 4, 'rf__max_features': 'sqrt', 'rf__max_depth': 60}

=== Training RF for role: MIDDLE (target = ally_MIDDLE) ===
  Dropped 2 rows with too-rare MIDDLE champs.
  Remaining unique MIDDLE champs: 169
Fitting 3 folds for each of 12 candidates, totalling 36 fits




MIDDLE: accuracy = 0.068
  Best params: {'rf__n_estimators': 400, 'rf__min_samples_split': 2, 'rf__min_samples_leaf': 4, 'rf__max_features': 'sqrt', 'rf__max_depth': 60}

=== Training RF for role: BOTTOM (target = ally_BOTTOM) ===
  Dropped 8 rows with too-rare BOTTOM champs.
  Remaining unique BOTTOM champs: 161
Fitting 3 folds for each of 12 candidates, totalling 36 fits




BOTTOM: accuracy = 0.177
  Best params: {'rf__n_estimators': 100, 'rf__min_samples_split': 10, 'rf__min_samples_leaf': 2, 'rf__max_features': 'sqrt', 'rf__max_depth': None}

=== Training RF for role: UTILITY (target = ally_UTILITY) ===
  Dropped 2 rows with too-rare UTILITY champs.
  Remaining unique UTILITY champs: 166
Fitting 3 folds for each of 12 candidates, totalling 36 fits




UTILITY: accuracy = 0.125
  Best params: {'rf__n_estimators': 400, 'rf__min_samples_split': 2, 'rf__min_samples_leaf': 4, 'rf__max_features': 'sqrt', 'rf__max_depth': 60}


In [None]:
import time

from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from xgboost import XGBClassifier

roles = ["TOP", "JUNGLE", "MIDDLE", "BOTTOM", "UTILITY"]

for role in roles:
    print("\n" + "=" * 80)
    print(f"=== Training XGBoost for role: {role} (target = ally_{role}) ===")
    print("=" * 80)

    target_col = f"ally_{role}"

    # keep only rows where this role exists
    role_df = team_df.dropna(subset=[target_col]).copy()
    print(f"  Initial rows for {role}: {len(role_df)}")

    # downsample for speed
    max_rows = 50_000
    if len(role_df) > max_rows:
        role_df = role_df.sample(n=max_rows, random_state=42)
        print(f"  Downsampled to {len(role_df)} rows for tuning.")

    # drop ultra-rare champs so stratify doesn't explode
    min_count = 2
    vc = role_df[target_col].value_counts()
    keep_labels = vc[vc >= min_count].index
    n_before = len(role_df)
    role_df = role_df[role_df[target_col].isin(keep_labels)].copy()
    n_after = len(role_df)
    print(f"  Dropped {n_before - n_after} rows with too-rare {role} champs.")
    print(f"  Remaining rows: {n_after}")
    print(f"  Remaining unique {role} champs: {role_df[target_col].nunique()}")

    if role_df[target_col].nunique() < 2:
        print("  Not enough classes after filtering; skipping this role.")
        continue

    # define X, y
    y = role_df[target_col]
    X = role_df.drop(columns=[target_col, "matchId", "win"])

    # encode target
    le = LabelEncoder()
    y_enc = le.fit_transform(y)

    # train/test split
    X_train, X_test, y_train, y_test = train_test_split(
        X,
        y_enc,
        test_size=0.2,
        random_state=42,
        stratify=y_enc
    )
    print(f"  Train size: {len(X_train)}, Test size: {len(X_test)}")

    # one-hot encode all categorical features
    cat_cols = X.columns.tolist()
    preprocessor = ColumnTransformer(
        transformers=[("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols)]
    )

    # base XGB model (GPU
    xgb = XGBClassifier(
        objective="multi:softprob",
        eval_metric="mlogloss",
        tree_method="hist",
        device="cuda",
        verbosity=2,
        random_state=42,
    )

    print("  XGBoost device:", xgb.get_params()["device"])

    # PIPELINE
    pipe = Pipeline(steps=[("prep", preprocessor), ("xgb", xgb)])

    param_dist = {
        "xgb__n_estimators":      [200, 400],
        "xgb__max_depth":         [3, 5],
        "xgb__learning_rate":     [0.1, 0.05],
        "xgb__subsample":         [0.8, 1.0],
        "xgb__colsample_bytree":  [0.8, 1.0],
        "xgb__min_child_weight":  [1, 5],
    }

    search = RandomizedSearchCV(
        estimator=pipe,
        param_distributions=param_dist,
        n_iter=5,
        cv=2,
        verbose=2,
        n_jobs=1,
        random_state=42
    )

    print("  >> Starting RandomizedSearchCV...")
    start_time = time.time()

    # Fit random search (GPU
    search.fit(X_train, y_train)

    elapsed = time.time() - start_time
    print(f"  >> Finished search for {role} in {elapsed:.1f} seconds")

    print("  Best CV score:", search.best_score_)
    print("  Best params:", search.best_params_)

    # Evaluate on held-out test set
    test_acc = search.score(X_test, y_test)
    print(f"{role}: test accuracy = {test_acc:.3f}")



=== Training XGBoost for role: TOP (target = ally_TOP) ===
  Initial rows for TOP: 203568
  Downsampled to 50000 rows for tuning.
  Dropped 4 rows with too-rare TOP champs.
  Remaining rows: 49996
  Remaining unique TOP champs: 163
  Train size: 39996, Test size: 10000
  XGBoost device: cuda
  >> Starting RandomizedSearchCV...
Fitting 2 folds for each of 5 candidates, totalling 10 fits
[22:08:30] INFO: C:\actions-runner\_work\xgboost\xgboost\src\data\iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (19998, 3018, 419958).
[22:08:30] INFO: C:\actions-runner\_work\xgboost\xgboost\src\data\ellpack_page.cu:174: Ellpack is sparse.
[CV] END xgb__colsample_bytree=1.0, xgb__learning_rate=0.05, xgb__max_depth=3, xgb__min_child_weight=5, xgb__n_estimators=200, xgb__subsample=0.8; total time= 1.0min
[22:09:31] INFO: C:\actions-runner\_work\xgboost\xgboost\src\data\iterative_dmatrix.cc:56: Finished constructing the `IterativeDMatrix`: (19998, 2987, 419958).
[22:09:31] INFO: C

In [None]:
import time

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from xgboost import XGBClassifier
#copied from logs
best_params_per_role = {
    "TOP": {
        "subsample": 0.8,
        "n_estimators": 200,
        "min_child_weight": 1,
        "max_depth": 3,
        "learning_rate": 0.1,
        "colsample_bytree": 0.8,
    },
    "JUNGLE": {
        "subsample": 1.0,
        "n_estimators": 200,
        "min_child_weight": 5,
        "max_depth": 3,
        "learning_rate": 0.1,
        "colsample_bytree": 0.8,
    },
    "MIDDLE": {
        "subsample": 1.0,
        "n_estimators": 200,
        "min_child_weight": 5,
        "max_depth": 3,
        "learning_rate": 0.1,
        "colsample_bytree": 0.8,
    },
    "BOTTOM": {
        "subsample": 0.8,
        "n_estimators": 200,
        "min_child_weight": 5,
        "max_depth": 3,
        "learning_rate": 0.05,
        "colsample_bytree": 1.0,
    },
    "UTILITY": {
        "subsample": 1.0,
        "n_estimators": 200,
        "min_child_weight": 5,
        "max_depth": 3,
        "learning_rate": 0.1,
        "colsample_bytree": 0.8,
    },
}

roles = ["TOP", "JUNGLE", "MIDDLE", "BOTTOM", "UTILITY"]

for role in roles:
    print("\n" + "=" * 80)
    print(f"=== FINAL XGBoost train for role: {role} (target = ally_{role}) ===")
    print("=" * 80)

    target_col = f"ally_{role}"

    # keep only rows where this role exists
    role_df = team_df.dropna(subset=[target_col]).copy()
    print(f"  Rows for {role}: {len(role_df)}")

    # drop ultra-rare champs
    min_count = 2
    vc = role_df[target_col].value_counts()
    keep_labels = vc[vc >= min_count].index
    n_before = len(role_df)
    role_df = role_df[role_df[target_col].isin(keep_labels)].copy()
    n_after = len(role_df)
    print(f"  Dropped {n_before - n_after} rows with too-rare {role} champs.")
    print(f"  Remaining rows: {n_after}")
    print(f"  Remaining unique {role} champs: {role_df[target_col].nunique()}")

    if role_df[target_col].nunique() < 2:
        print("  Not enough classes after filtering; skipping this role.")
        continue

    # define X, y
    y = role_df[target_col]
    X = role_df.drop(columns=[target_col, "matchId", "win"])

    # encode target
    le = LabelEncoder()
    y_enc = le.fit_transform(y)

    # train/test split
    X_train, X_test, y_train, y_test = train_test_split(
        X,
        y_enc,
        test_size=0.2,
        random_state=42,
        stratify=y_enc,
    )
    print(f"  Train size: {len(X_train)}, Test size: {len(X_test)}")

    # one-hot encode all categorical features
    cat_cols = X.columns.tolist()
    preprocessor = ColumnTransformer(
        transformers=[("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols)]
    )

    # XGB model with role-specific best params
    params = best_params_per_role[role]

    xgb = XGBClassifier(
        objective="multi:softprob",
        eval_metric="mlogloss",
        tree_method="hist",
        device="cuda",         # GPU
        verbosity=1,
        random_state=42,
        **params,
    )
    print("  XGBoost device:", xgb.get_params()["device"])
    print("  XGBoost params:", params)

    # PIPELINE
    pipe = Pipeline(steps=[("prep", preprocessor), ("xgb", xgb)])

    # Fit once
    print("  >> Fitting final model...")
    start_time = time.time()
    pipe.fit(X_train, y_train)
    elapsed = time.time() - start_time
    print(f"  >> Finished training for {role} in {elapsed:.1f} seconds")

    # Evaluate on held-out test set
    test_acc = pipe.score(X_test, y_test)
    print(f"{role}: FINAL test accuracy = {test_acc:.3f}")



=== FINAL XGBoost train for role: TOP (target = ally_TOP) ===
  Rows for TOP: 203568
  Dropped 0 rows with too-rare TOP champs.
  Remaining rows: 203568
  Remaining unique TOP champs: 171
  Train size: 162854, Test size: 40714
  XGBoost device: cuda
  XGBoost params: {'subsample': 0.8, 'n_estimators': 200, 'min_child_weight': 1, 'max_depth': 3, 'learning_rate': 0.1, 'colsample_bytree': 0.8}
  >> Fitting final model...
  >> Finished training for TOP in 62.1 seconds
TOP: FINAL test accuracy = 0.092

=== FINAL XGBoost train for role: JUNGLE (target = ally_JUNGLE) ===
  Rows for JUNGLE: 203597
  Dropped 7 rows with too-rare JUNGLE champs.
  Remaining rows: 203590
  Remaining unique JUNGLE champs: 158
  Train size: 162872, Test size: 40718
  XGBoost device: cuda
  XGBoost params: {'subsample': 1.0, 'n_estimators': 200, 'min_child_weight': 5, 'max_depth': 3, 'learning_rate': 0.1, 'colsample_bytree': 0.8}
  >> Fitting final model...
  >> Finished training for JUNGLE in 47.9 seconds
JUNGLE: F