In [1]:
import pandas as pd
import numpy as np
import json

In [2]:
df = pd.read_json("features/Kaisa_features.json")

df_expanded = pd.concat([
    df[col].apply(pd.Series).add_prefix(f'{col}_')
    for col in df.columns
], axis=1)

for i in range(10):
    items_col = f"{i}_items"
    items_expanded = df_expanded[items_col].apply(pd.Series)
    items_expanded.columns = [f"{items_col}_{j}" for j in range(items_expanded.shape[1])]
    df_expanded = pd.concat([df_expanded, items_expanded], axis=1)
    df_expanded.drop(f"{i}_items", axis=1, inplace=True)

In [3]:
print(f"Total matches: {len(df_expanded)}")
df_same_lane = df_expanded[
    (df_expanded["0_lane"] == "TOP")     & (df_expanded["1_lane"] == "TOP") &
    (df_expanded["2_lane"] == "JUNGLE")  & (df_expanded["3_lane"] == "JUNGLE") &
    (df_expanded["4_lane"] == "MIDDLE")  & (df_expanded["5_lane"] == "MIDDLE") &
    (df_expanded["6_lane"] == "BOTTOM")  & (df_expanded["7_lane"] == "BOTTOM") & 
    (df_expanded["8_lane"] == "UTILITY") & (df_expanded["9_lane"] == "UTILITY")
]
lanes = ["TOP", "JUNGLE", "MIDDLE", "BOTTOM", "UTILITY"]
print(f"Number of matches with correct lane setup: {len(df_same_lane)}")
df_same_lane.to_csv("features/Kaisa_features_expanded.csv", index=False)

Total matches: 2756
Number of matches with correct lane setup: 2679


In [4]:
# Load champion IDs
with open(f"champions.json", "r", encoding="utf-8") as f:
    json_data = json.load(f)
    champions = {json_data[champion]["id"]: champion for champion in json_data.keys()}

# Load Item IDs (Only legendary items)
with open(f"items/items.json", "r", encoding="utf-8") as f:
    json_data = json.load(f)
    item_ids = [json_data[str_id]["id"] for str_id in json_data.keys() if json_data[str_id]["tier"] in [3, 4]]
    item_ids = set(item_ids).intersection()

In [5]:
# Get the columns with champion IDs
champ_cols = [col for col in df_same_lane.columns if '_championId' in col]

# Function to get team for each player column
def get_team(col):
    player_index = int(col.split('_')[0])
    return 100 if player_index < 5 else 200

# Prepare empty DataFrames for each team
ohe_team100 = pd.DataFrame(0, index=df_same_lane.index, columns=[f"team100_{cname}" for cid, cname in champions.items()])
ohe_team200 = pd.DataFrame(0, index=df_same_lane.index, columns=[f"team200_{cname}" for cid, cname in champions.items()])

# Fill OHE
for col in champ_cols:
    team = get_team(col)
    for idx, champ_id in df_same_lane[col].items():
        champName = champions[champ_id]
        if team == 100:
            ohe_team100.loc[idx, f"team100_{champName}"] = 1
        else:
            ohe_team200.loc[idx, f"team200_{champName}"] = 1

# Concatenate the OHE columns to your DataFrame
df_ohe = pd.concat([df_same_lane, ohe_team100, ohe_team200], axis=1)

In [6]:
import numpy as np
import pandas as pd

champId = 145  # Kai'Sa ID

# Find in which column (player index) Kai'Sa appears in each row
kai_col = df_ohe[[f"{i}_championId" for i in range(10)]].eq(champId)

# Get the player index for Kai'Sa for each row
kai_player_idx = kai_col.idxmax(axis=1)  # Will give like '3_championId', '7_championId', etc.

# Filter only rows where Kai'Sa is present
has_kai = kai_col.any(axis=1)
df_kai = df_ohe.loc[has_kai].copy()

# Extract stats for Kai'Sa player in each row
def get_stat(row, stat):
    player_prefix = row['kai_player'].split('_')[0]
    return row[f"{player_prefix}_{stat}"]

df_kai['kai_player'] = kai_player_idx[has_kai]

# Example stats you want to collect
for stat in ['kills', 'deaths', 'assists', 'goldEarned', 'level',] :
    df_kai[f'kaisa_{stat}'] = df_kai.apply(lambda row: get_stat(row, stat), axis=1)

for item in [f"items_{i}" for i in range(6)]:
    df_kai[f'kaisa_{item}'] = df_kai.apply(lambda row: get_stat(row, item), axis=1)
    df_kai[f'kaisa_{item}'] = df_kai.apply(lambda row: row[f"kaisa_{item}"] if row[f"kaisa_{item}"] in item_ids else 0, axis=1)

# Example: Calculate KDA for Kai'Sa
df_kai['kaisa_kda'] = (df_kai['kaisa_kills'] + df_kai['kaisa_assists']) / df_kai['kaisa_deaths'].replace(0, np.nan)
df_kai['good_gold'] = df_kai['kaisa_goldEarned'] >= df_ohe.loc[:, [f"{i}_goldEarned" for i in range(10)]].mean(axis=1)

# Drop helper columns if you want
df_kai = df_kai[(df_kai['kaisa_kda'] >= 0.8) & (df_kai['good_gold'])]

df_kai = df_kai.drop([f"{i}_kills" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_deaths" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_assists" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_teamId" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_championId" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_lane" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_level" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_matchResult" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_goldEarned" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_items_0" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_items_1" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_items_2" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_items_3" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_items_4" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_items_5" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_items_6" for i in range(10)], axis=1)
df_kai = df_kai.drop(['kaisa_kills', 'kaisa_kda', 'good_gold', 'kaisa_deaths', 'kaisa_assists', 'kaisa_goldEarned', 'kaisa_level'], axis=1)
df_kai = df_kai.drop(columns=['kai_player'])

In [7]:
from sklearn.model_selection import train_test_split

X = df_kai.drop([col for col in df_kai if "items" not in col], axis=1)
y = df_kai[[col for col in df_kai if "items" in col]]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [8]:
import numpy as np

num_slots = 7
num_items = len(item_ids) + 1  # If you want to include 0 (empty slot) as a class

# Convert to numpy
y_class = y.to_numpy()

array([[   0,    0, 3087, 6676,    0,    0],
       [   0, 3046, 3087,    0, 6676,    0],
       [   0, 3036, 3087, 3046,    0,    0],
       ...,
       [6655,    0, 3042,    0,    0, 3115],
       [6676,    0,    0, 3032,    0, 3033],
       [   0, 6676,    0, 3032,    0, 6675]], shape=(1897, 6))

In [18]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Split your data
X_train, X_test, y_train, y_test = train_test_split(X, y_class, test_size=0.5, random_state=42)

# Random Forest (with one classifier per slot)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
multi_rf = MultiOutputClassifier(rf)

# Train
multi_rf.fit(X_train, y_train)

# Predict
y_pred = multi_rf.predict(X_test)

# Accuracy per slot
for slot in range(6):
    acc = accuracy_score(y_test[:, slot], y_pred[:, slot])
    print(f"Slot {slot} accuracy: {acc:.3f}")

Slot 0 accuracy: 0.859
Slot 1 accuracy: 0.855
Slot 2 accuracy: 0.901
Slot 3 accuracy: 0.877
Slot 4 accuracy: 0.854
Slot 5 accuracy: 0.883


In [12]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

True
NVIDIA GeForce RTX 3060 Ti
