In [1]:
import pandas as pd
import numpy as np
import json
import os

In [7]:
feature_dir = os.path.join("features", "postgame")
filenames = os.listdir(feature_dir)

df_list = []
for fn in filenames:
    sub_df = pd.read_json(os.path.join(feature_dir, fn))
    df_list.append(sub_df)

df = pd.concat(df_list)

print(f"Found {len(df)} matches")
df.head()

Found 9339 matches


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,"{'participantId': 1, 'championId': 2, 'teamId'...","{'participantId': 6, 'championId': 84, 'teamId...","{'participantId': 2, 'championId': 254, 'teamI...","{'participantId': 7, 'championId': 950, 'teamI...","{'participantId': 3, 'championId': 112, 'teamI...","{'participantId': 8, 'championId': 55, 'teamId...","{'participantId': 4, 'championId': 202, 'teamI...","{'participantId': 9, 'championId': 145, 'teamI...","{'participantId': 5, 'championId': 80, 'teamId...","{'participantId': 10, 'championId': 78, 'teamI..."
1,"{'participantId': 1, 'championId': 6, 'teamId'...","{'participantId': 6, 'championId': 2, 'teamId'...","{'participantId': 2, 'championId': 106, 'teamI...","{'participantId': 7, 'championId': 950, 'teamI...","{'participantId': 3, 'championId': 4, 'teamId'...","{'participantId': 8, 'championId': 105, 'teamI...","{'participantId': 4, 'championId': 523, 'teamI...","{'participantId': 9, 'championId': 145, 'teamI...","{'participantId': 5, 'championId': 117, 'teamI...","{'participantId': 10, 'championId': 267, 'team..."
2,"{'participantId': 1, 'championId': 75, 'teamId...","{'participantId': 6, 'championId': 83, 'teamId...","{'participantId': 2, 'championId': 254, 'teamI...","{'participantId': 7, 'championId': 950, 'teamI...","{'participantId': 3, 'championId': 85, 'teamId...","{'participantId': 8, 'championId': 13, 'teamId...","{'participantId': 4, 'championId': 221, 'teamI...","{'participantId': 9, 'championId': 145, 'teamI...","{'participantId': 5, 'championId': 143, 'teamI...","{'participantId': 10, 'championId': 117, 'team..."
3,"{'participantId': 1, 'championId': 39, 'teamId...","{'participantId': 6, 'championId': 777, 'teamI...","{'participantId': 2, 'championId': 32, 'teamId...","{'participantId': 7, 'championId': 950, 'teamI...","{'participantId': 3, 'championId': 157, 'teamI...","{'participantId': 8, 'championId': 90, 'teamId...","{'participantId': 4, 'championId': 202, 'teamI...","{'participantId': 9, 'championId': 145, 'teamI...","{'participantId': 5, 'championId': 50, 'teamId...","{'participantId': 10, 'championId': 26, 'teamI..."
4,"{'participantId': 1, 'championId': 86, 'teamId...","{'participantId': 6, 'championId': 887, 'teamI...","{'participantId': 3, 'championId': 32, 'teamId...","{'participantId': 7, 'championId': 950, 'teamI...","{'participantId': 2, 'championId': 112, 'teamI...","{'participantId': 8, 'championId': 157, 'teamI...","{'participantId': 4, 'championId': 15, 'teamId...","{'participantId': 9, 'championId': 145, 'teamI...","{'participantId': 5, 'championId': 43, 'teamId...","{'participantId': 10, 'championId': 526, 'team..."


In [8]:
df_expanded = pd.concat([
    df[col].apply(pd.Series).add_prefix(f'{col}_')
    for col in df.columns
], axis=1)

for i in range(10):
    items_col = f"{i}_items"
    items_expanded = df_expanded[items_col].apply(pd.Series)
    items_expanded.columns = [f"{items_col}_{j}" for j in range(items_expanded.shape[1])]
    df_expanded = pd.concat([df_expanded, items_expanded], axis=1)
    df_expanded.drop(f"{i}_items", axis=1, inplace=True)

In [183]:
print(f"Total matches: {len(df_expanded)}")
df_same_lane = df_expanded[
    (df_expanded["0_lane"] == "TOP")     & (df_expanded["1_lane"] == "TOP") &
    (df_expanded["2_lane"] == "JUNGLE")  & (df_expanded["3_lane"] == "JUNGLE") &
    (df_expanded["4_lane"] == "MIDDLE")  & (df_expanded["5_lane"] == "MIDDLE") &
    (df_expanded["6_lane"] == "BOTTOM")  & (df_expanded["7_lane"] == "BOTTOM") & 
    (df_expanded["8_lane"] == "UTILITY") & (df_expanded["9_lane"] == "UTILITY")
]
lanes = ["TOP", "JUNGLE", "MIDDLE", "BOTTOM", "UTILITY"]
print(f"Number of matches with correct lane setup: {len(df_same_lane)}")
df_same_lane.to_csv("features/Kaisa_features_expanded.csv", index=False)

Total matches: 9339
Number of matches with correct lane setup: 9140


In [184]:
# Load champion IDs
with open(f"champions.json", "r", encoding="utf-8") as f:
    json_data = json.load(f)
    champions = {json_data[champion]["id"]: champion for champion in json_data.keys()}

# Load Item IDs (Only legendary items)
with open(f"items/items.json", "r", encoding="utf-8") as f:
    json_data = json.load(f)
    item_ids = [json_data[str_id]["id"] for str_id in json_data.keys() if json_data[str_id]["tier"] in [3, 4]]
    item_ids = set(item_ids).intersection()

In [187]:
# Get the columns with champion IDs
champ_cols = [col for col in df_same_lane.columns if '_championId' in col]

# Prepare empty DataFrames for each team
ohe_team100 = pd.DataFrame(0, index=df_same_lane.index, columns=[f"team100_{cname}" for cid, cname in champions.items()])
ohe_team200 = pd.DataFrame(0, index=df_same_lane.index, columns=[f"team200_{cname}" for cid, cname in champions.items()])

df_same_lane.reset_index(inplace=True, drop=True)

# Fill OHE
def fill_ohe_column(row, col):
    champ_id = row[col]
    playerId = col.split("_")[0]
    team = row[f"{playerId}_teamId"]
    champName = champions[champ_id]
    if team == 100:
        ohe_team100.at[row.name, f"team100_{champName}"] = 1
    else:
        ohe_team200.at[row.name, f"team200_{champName}"] = 1

for col in champ_cols:
    df_same_lane.apply(lambda row: fill_ohe_column(row, col), axis=1)

# Concatenate the OHE columns to your DataFrame
df_ohe = pd.concat([df_same_lane, ohe_team100, ohe_team200], axis=1)

In [198]:
import numpy as np
import pandas as pd

champId = 145  # Kai'Sa ID

# Find in which column (player index) Kai'Sa appears in each row
kai_col = df_ohe[[f"{i}_championId" for i in range(10)]].eq(champId)

# Get the player index for Kai'Sa for each row
kai_player_idx = kai_col.idxmax(axis=1)  # Will give like '3_championId', '7_championId', etc.

# Filter only rows where Kai'Sa is present
has_kai = kai_col.any(axis=1)
df_kai = df_ohe.loc[has_kai].copy()
print(f"Matches with Kaisa: {len(df_kai)}")

# Extract stats for Kai'Sa player in each row
def get_stat(row, stat):
    player_prefix = row['kai_player'].split('_')[0]
    return row[f"{player_prefix}_{stat}"]

df_kai['kai_player'] = kai_player_idx[has_kai]

# Example stats you want to collect
for stat in ['kills', 'deaths', 'assists', 'goldEarned', 'level',] :
    df_kai[f'kaisa_{stat}'] = df_kai.apply(lambda row: get_stat(row, stat), axis=1)

for item in [f"items_{i}" for i in range(6)]:
    df_kai[f'kaisa_{item}'] = df_kai.apply(lambda row: get_stat(row, item), axis=1)
    df_kai[f'kaisa_{item}'] = df_kai.apply(lambda row: row[f"kaisa_{item}"] if row[f"kaisa_{item}"] in item_ids else 0, axis=1)

# Example: Calculate KDA for Kai'Sa
df_kai['kaisa_kda'] = (df_kai['kaisa_kills'] + df_kai['kaisa_assists']) / df_kai['kaisa_deaths'].replace(0, np.nan)
df_kai['good_gold'] = df_kai['kaisa_goldEarned'] >= df_kai.loc[:, [f"{i}_goldEarned" for i in range(10)]].mean(axis=1)

# Drop helper columns if you want
df_kai = df_kai[(df_kai['kaisa_kda'] >= 0.8) & (df_kai['good_gold'])]

df_kai = df_kai.drop([f"{i}_kills" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_deaths" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_assists" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_teamId" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_participantId" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_championId" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_championName" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_lane" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_level" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_matchResult" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_goldEarned" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_items_0" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_items_1" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_items_2" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_items_3" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_items_4" for i in range(10)], axis=1)
df_kai = df_kai.drop([f"{i}_items_5" for i in range(10)], axis=1)
df_kai = df_kai.drop(['kaisa_kills', 'kaisa_kda', 'good_gold', 'kaisa_deaths', 'kaisa_assists', 'kaisa_goldEarned', 'kaisa_level'], axis=1)
df_kai = df_kai.drop(columns=['kai_player'])

df_kai = df_kai.reset_index(drop=True)

Matches with Kaisa: 4154


In [201]:
from sklearn.model_selection import train_test_split

X = df_kai.drop([col for col in df_kai if "items" in col], axis=1)
y = df_kai[[col for col in df_kai if "items" in col]]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [202]:
X.head()

Unnamed: 0,team100_Aatrox,team100_Ahri,team100_Akali,team100_Akshan,team100_Alistar,team100_Ambessa,team100_Amumu,team100_Anivia,team100_Annie,team100_Aphelios,...,team200_Yone,team200_Yorick,team200_Yuumi,team200_Zac,team200_Zed,team200_Zeri,team200_Ziggs,team200_Zilean,team200_Zoe,team200_Zyra
0,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
row0 = X.loc[:1, :]
row0 = row0.reset_index(drop=True)
for col in row0.columns:
    if row0.loc[0, col]:
        print(col)

team100_Jhin
team100_Qiyana
team100_Velkoz
team100_Volibear
team100_Yone
team200_DrMundo
team200_Garen
team200_Kaisa
team200_Malzahar
team200_Pantheon


In [207]:
y.head()

Unnamed: 0,kaisa_items_0,kaisa_items_1,kaisa_items_2,kaisa_items_3,kaisa_items_4,kaisa_items_5
0,6672.0,3124.0,0.0,3115.0,0.0,0.0
1,3124.0,6672.0,0.0,0.0,0.0,0.0
2,3157.0,6672.0,0.0,3124.0,3302.0,0.0
3,3157.0,6672.0,3124.0,3115.0,0.0,0.0
4,0.0,6672.0,3124.0,0.0,0.0,0.0


In [208]:
import numpy as np

num_slots = 7
num_items = len(item_ids) # If you want to include 0 (empty slot) as a class

# Convert to numpy
y_class = y.to_numpy()

In [209]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Split your data
X_train, X_test, y_train, y_test = train_test_split(X, y_class, test_size=0.5, random_state=42)

# Random Forest (with one classifier per slot)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
multi_rf = MultiOutputClassifier(rf)

# Train
multi_rf.fit(X_train, y_train)

# Predict
y_pred = multi_rf.predict(X_test)

# Accuracy per slot
for slot in range(6):
    acc = accuracy_score(y_test[:, slot], y_pred[:, slot])
    print(f"Slot {slot} accuracy: {acc:.3f}")

Slot 0 accuracy: 0.162
Slot 1 accuracy: 0.174
Slot 2 accuracy: 0.279
Slot 3 accuracy: 0.396
Slot 4 accuracy: 0.694
Slot 5 accuracy: 0.927


In [210]:
import torch
import torch.nn as nn
import torch.optim as optim

In [218]:
from sklearn.model_selection import train_test_split
import torch
import numpy as np

# Split X and y while keeping y as DataFrame
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Multi-hot encode y
unique_item_ids = sorted(set(np.unique(y.values.flatten())) - {0})  # Removing zero (assumed as 'no item' marker)
item_id_to_idx = {item_id: idx for idx, item_id in enumerate(unique_item_ids)}
num_items = len(unique_item_ids)
print(f"Total items: {num_items}")

def multi_hot_encode(row):
    vec = np.zeros(num_items, dtype=np.float32)
    for item_id in row:
        if item_id != 0:
            vec[item_id_to_idx[item_id]] = 1.0
    return vec

# Convert y_train
y_train_multi_hot = np.vstack(y_train.apply(multi_hot_encode, axis=1).to_numpy())
y_train_tensor = torch.tensor(y_train_multi_hot, dtype=torch.float32)

# Convert y_test
y_test_multi_hot = np.vstack(y_test.apply(multi_hot_encode, axis=1).to_numpy())
y_test_tensor = torch.tensor(y_test_multi_hot, dtype=torch.float32)

# Convert X
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)


Total items: 65


In [219]:
class SimpleMLP(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleMLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, output_dim)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()  # Because it's multi-label (not softmax)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.sigmoid(self.fc4(x))
        return x

model = SimpleMLP(input_dim=X.shape[1], output_dim=y_test_tensor.shape[1])


In [220]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [221]:
num_epochs = 100
batch_size = 64

for epoch in range(num_epochs):
    permutation = torch.randperm(X_test_tensor.size()[0])
    total_loss = 0

    for i in range(0, X_test_tensor.size()[0], batch_size):
        indices = permutation[i:i+batch_size]
        batch_X, batch_y = X_test_tensor[indices], y_test_tensor[indices]

        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")


Epoch 1, Loss: 6.7942
Epoch 2, Loss: 5.7844
Epoch 3, Loss: 2.6487
Epoch 4, Loss: 1.6642
Epoch 5, Loss: 1.4284
Epoch 6, Loss: 1.3188
Epoch 7, Loss: 1.3019
Epoch 8, Loss: 1.2874
Epoch 9, Loss: 1.2800
Epoch 10, Loss: 1.2731
Epoch 11, Loss: 1.2608
Epoch 12, Loss: 1.2480
Epoch 13, Loss: 1.2489
Epoch 14, Loss: 1.2496
Epoch 15, Loss: 1.2391
Epoch 16, Loss: 1.2317
Epoch 17, Loss: 1.2274
Epoch 18, Loss: 1.2360
Epoch 19, Loss: 1.2329
Epoch 20, Loss: 1.2148
Epoch 21, Loss: 1.2165
Epoch 22, Loss: 1.2145
Epoch 23, Loss: 1.2107
Epoch 24, Loss: 1.1935
Epoch 25, Loss: 1.1817
Epoch 26, Loss: 1.1819
Epoch 27, Loss: 1.1779
Epoch 28, Loss: 1.1664
Epoch 29, Loss: 1.1585
Epoch 30, Loss: 1.1369
Epoch 31, Loss: 1.1202
Epoch 32, Loss: 1.1100
Epoch 33, Loss: 1.0811
Epoch 34, Loss: 1.0622
Epoch 35, Loss: 1.0443
Epoch 36, Loss: 1.0167
Epoch 37, Loss: 0.9947
Epoch 38, Loss: 0.9871
Epoch 39, Loss: 0.9641
Epoch 40, Loss: 0.9414
Epoch 41, Loss: 0.9192
Epoch 42, Loss: 0.9082
Epoch 43, Loss: 0.8912
Epoch 44, Loss: 0.87

In [222]:
model.eval()
with torch.no_grad():
    outputs = model(X_test_tensor)
    test_loss = criterion(outputs, y_test_tensor)
    print(f"Test Loss: {test_loss.item()}")

    # Get top 6 predictions for each sample
    _, top6_indices = torch.topk(outputs, k=6, dim=1)

    # Total number of samples
    total_samples = y_test_tensor.shape[0]
    all_slots_correct_samples = 0
    single_slot_correct_samples = 0

    for i in range(total_samples):
        # Indices of true labels (where y == 1)
        true_indices = (y_test_tensor[i] == 1).nonzero(as_tuple=True)[0]

        # Skip if there are no true labels (optional: depends on your definition of accuracy)
        if true_indices.numel() == 0:
            continue

        # Indices predicted as top-6
        predicted_top6 = top6_indices[i]

        single_slot_correct = 0
        # Check if all true labels are in the top-6
        if set(true_indices.tolist()).issubset(set(predicted_top6.tolist())):
            all_slots_correct_samples += 1
            single_slot_correct_samples += 1
        else:
            for idx in true_indices.tolist():
                if idx in set(predicted_top6.tolist()):
                    single_slot_correct += 1
            single_slot_correct_samples += single_slot_correct / len(true_indices.tolist())

    # Top-6 accuracy based only on rows that had at least one positive label
    all_slots_accuracy = all_slots_correct_samples / total_samples
    single_slot_accuracy = single_slot_correct_samples / total_samples
    print(f"All-slots multi-label accuracy: {all_slots_accuracy:.4f}")
    print(f"Single-slot multi-label accuracy: {single_slot_accuracy:.4f}")

Test Loss: 0.013259225524961948
All-slots multi-label accuracy: 0.9441
Single-slot multi-label accuracy: 0.9834


In [223]:
# X_custom = pd.DataFrame(columns=X.columns)
# X_custom = pd.concat([pd.DataFrame([[0] * len(X_custom.columns)] * 256, columns=X_custom.columns), X_custom], ignore_index=True)

X_custom = X_test.iloc[:340, :]

# Agora, converter todas as colunas para float32
X_custom_tensor = torch.tensor(X_custom.values.astype(np.float32))

model.eval()
with torch.no_grad():
    outputs = model(X_custom_tensor)
    topk_values, topk_indices = torch.topk(outputs, k=6, dim=1)

with open(f"items/items.json", "r") as fp:
    item_dict = json.load(fp)

for i in range(topk_indices.shape[0]):
    predicted_idx = []
    for idx in topk_indices[i]:
        predicted_idx.append(idx)
    predicted_items = []
    for id, idx in item_id_to_idx.items():
        if idx in predicted_idx:
            predicted_items.append(item_dict[str(int(id))]["name"])
    print(",".join(predicted_items))    

Statikk Shiv,Nashor's Tooth,Guinsoo's Rageblade,Blade of the Ruined King,Terminus,Kraken Slayer
Nashor's Tooth,Guinsoo's Rageblade,Void Staff,Zephyr,Terminus,Kraken Slayer
Yun Tal Wildarrows,Mortal Reminder,Statikk Shiv,Nashor's Tooth,Guinsoo's Rageblade,Navori Flickerblade
Nashor's Tooth,Rylai's Crystal Scepter,Guinsoo's Rageblade,Zephyr,Shadowflame,Kraken Slayer
Nashor's Tooth,Guinsoo's Rageblade,Mercurial Scimitar,Zhonya's Hourglass,Terminus,Kraken Slayer
Statikk Shiv,Void Staff,Spellslinger's Shoes,Umbral Glaive,Essence Reaver,Luden's Companion
Statikk Shiv,Nashor's Tooth,Guinsoo's Rageblade,Void Staff,Zhonya's Hourglass,Kraken Slayer
Guardian Angel,Yun Tal Wildarrows,Mortal Reminder,Lord Dominik's Regards,Phantom Dancer,Runaan's Hurricane
Guinsoo's Rageblade,Mercurial Scimitar,Zhonya's Hourglass,Terminus,Kraken Slayer,The Collector
Guinsoo's Rageblade,Mercurial Scimitar,Zhonya's Hourglass,Terminus,Kraken Slayer,The Collector
Statikk Shiv,Nashor's Tooth,Guinsoo's Rageblade,Zhonya's

In [224]:
X_test.head()

Unnamed: 0,team100_Aatrox,team100_Ahri,team100_Akali,team100_Akshan,team100_Alistar,team100_Ambessa,team100_Amumu,team100_Anivia,team100_Annie,team100_Aphelios,...,team200_Yone,team200_Yorick,team200_Yuumi,team200_Zac,team200_Zed,team200_Zeri,team200_Ziggs,team200_Zilean,team200_Zoe,team200_Zyra
1721,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2334,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
443,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2332,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2882,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
