In [1]:
import pandas as pd
import pickle

In [2]:
df_champion = pd.read_csv(r'D:\AI\cuoikiDS\data\ChampionTbl.csv')

df_champion = df_champion.sort_values('ChampionId').reset_index(drop=True)

id_to_idx = {row['ChampionId']: i for i, row in df_champion.iterrows()}
idx_to_name = {i: row['ChampionName'] for i, row in df_champion.iterrows()}

with open('champion_mapping.pkl', 'wb') as f:
    pickle.dump({'id_to_idx': id_to_idx, 'idx_to_name': idx_to_name}, f)

print(f"Đã tạo mapping cho {len(id_to_idx)} vị tướng.")

Đã tạo mapping cho 173 vị tướng.


In [3]:
champ_cols = ['B1Champ','B2Champ','B3Champ','B4Champ','B5Champ','R1Champ','R2Champ','R3Champ','R4Champ','R5Champ']

In [4]:
df_matches = pd.read_csv(r'D:\AI\cuoikiDS\data\TeamMatchTbl.csv')

In [8]:
df_matches.isnull().sum()

TeamID                 0
MatchFk                0
B1Champ                0
B2Champ                0
B3Champ                0
B4Champ                0
B5Champ                0
R1Champ                0
R2Champ                0
R3Champ                0
R4Champ                0
R5Champ                0
BlueBaronKills         0
BlueRiftHeraldKills    0
BlueDragonKills        0
BlueTowerKills         0
BlueKills              0
RedBaronKills          0
RedRiftHeraldKills     0
RedDragonKills         0
RedTowerKills          0
RedKills               0
RedWin                 0
BlueWin                0
dtype: int64

In [11]:
df_matches.duplicated().sum()

np.int64(0)

In [5]:
for col in champ_cols:
    df_matches[col] = df_matches[col].map(id_to_idx)

In [6]:
df_matches.head()

Unnamed: 0,TeamID,MatchFk,B1Champ,B2Champ,B3Champ,B4Champ,B5Champ,R1Champ,R2Champ,R3Champ,...,BlueDragonKills,BlueTowerKills,BlueKills,RedBaronKills,RedRiftHeraldKills,RedDragonKills,RedTowerKills,RedKills,RedWin,BlueWin
0,1,EUW1_7565751492,167,115,116,48,169,119,5,25,...,1,3,13,1,0,3,8,26,1,0
1,2,EUW1_7565549583,73,132,116,131,78,6,136,104,...,3,10,39,0,1,1,3,33,0,1
2,3,EUW1_7564803077,150,28,4,149,130,23,60,38,...,2,7,27,2,0,3,8,37,1,0
3,4,EUW1_7564368646,50,34,55,149,87,57,25,51,...,0,4,55,0,0,0,0,39,0,1
4,5,EUW1_7564332041,12,159,93,114,110,109,85,51,...,0,0,42,0,0,0,0,0,0,1


In [7]:
df_matches['WinTarget'] = df_matches['BlueWin'].astype(float)

In [8]:
clean_df = df_matches[champ_cols + ['WinTarget']].dropna()

In [9]:
clean_df.head()

Unnamed: 0,B1Champ,B2Champ,B3Champ,B4Champ,B5Champ,R1Champ,R2Champ,R3Champ,R4Champ,R5Champ,WinTarget
0,167,115,116,48,169,119,5,25,125,148,0.0
1,73,132,116,131,78,6,136,104,42,169,1.0
2,150,28,4,149,130,23,60,38,168,16,0.0
3,50,34,55,149,87,57,25,51,90,5,1.0
4,12,159,93,114,110,109,85,51,172,4,1.0


In [10]:
import torch
from torch_geometric.data import Data

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
import torch
from torch_geometric.data import Data

# 1. Tạo khung xương cạnh (Dùng chung cho mọi trận)
def create_match_edges():
    edges = []
    # Đồng minh: 0-4 nối nhau, 5-9 nối nhau
    for team in [range(0, 5), range(5, 10)]:
        for i in team:
            for j in team:
                if i != j: edges.append([i, j])
    # Đối thủ: 0-4 nối 5-9
    for i in range(0, 5):
        for j in range(5, 10):
            edges.append([i, j]); edges.append([j, i])
    return torch.tensor(edges, dtype=torch.long).t().contiguous()

edge_index = create_match_edges()

# 2. Tạo Dataset
def build_pyg_data(df):
    data_list = []
    for _, row in df.iterrows():
        # Lấy 10 tướng đã mapping
        x = torch.tensor(row[champ_cols].values, dtype=torch.long)
        # Lấy kết quả thắng/thua
        y = torch.tensor([row['WinTarget']], dtype=torch.float)
        
        # Đóng gói thành đồ thị
        data = Data(x=x, edge_index=edge_index, y=y)
        data_list.append(data)
    return data_list

from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(clean_df, test_size=0.2, random_state=42)

train_dataset = build_pyg_data(train_df)
test_dataset = build_pyg_data(test_df)

In [12]:
train_dataset[0]

Data(x=[10], edge_index=[2, 90], y=[1])

In [13]:
from torch_geometric.loader import DataLoader
# Giả sử bạn đã có train_dataset và test_dataset từ bước trước
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1024, shuffle=False)


In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv, global_mean_pool

class LoLGATRecommender(torch.nn.Module):
    def __init__(self, num_champions, embedding_dim=32, hidden_dim=64):
        super(LoLGATRecommender, self).__init__()
        self.embedding = torch.nn.Embedding(num_champions, embedding_dim)
        self.pos_embedding = torch.nn.Embedding(10, embedding_dim)
        self.gat1 = GATConv(embedding_dim, hidden_dim, heads=4, concat=True)
        self.gat2 = GATConv(hidden_dim * 4, hidden_dim, heads=1, concat=False)
        self.fc = torch.nn.Linear(hidden_dim, 1)

    def forward(self, x, edge_index, batch):
        # Cộng tọa độ vị trí vào embedding tướng ngay từ đầu
        pos = torch.arange(10, device=x.device).repeat(len(x)//10)
        x = self.embedding(x) + self.pos_embedding(pos)
        x = F.elu(self.gat1(x, edge_index))
        x = F.elu(self.gat2(x, edge_index))
        x = global_mean_pool(x, batch) 
        return torch.sigmoid(self.fc(x))

In [15]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

num_champions = len(id_to_idx) # Số lượng tướng thực tế bạn đã mapping
model = LoLGATRecommender(num_champions=num_champions).to(device)

# Bộ tối ưu Adam và hàm Loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
criterion = torch.nn.BCELoss() # Dùng cho bài toán phân loại nhị phân Win/Loss

In [16]:
def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch).squeeze()
        loss = F.binary_cross_entropy(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

def evaluate(loader):
    model.eval()
    correct = 0
    total = 0
    total_mae = 0
    total_mse = 0
    
    with torch.no_grad():
        for data in loader:
            data = data.to(device)
            out = model(data.x, data.edge_index, data.batch).squeeze()
            
            # Tính Accuracy
            pred = (out > 0.5).float() # Ngưỡng 0.5
            correct += (pred == data.y).sum().item()
            total += data.y.size(0)
            
            # Tính MAE & MSE (cho RMSE)
            total_mae += torch.abs(out - data.y).sum().item()
            total_mse += ((out - data.y) ** 2).sum().item()

    accuracy = correct / total
    mae = total_mae / total
    rmse = (total_mse / total) ** 0.5
    
    return accuracy, mae, rmse

In [17]:
import time

epochs = 50
for epoch in range(1, epochs + 1):
    start_time = time.time()
    
    model.train()
    total_loss = 0
    train_correct = 0
    train_total = 0
    
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        
        out = model(data.x, data.edge_index, data.batch).squeeze()
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        predicted = (out > 0.5).float()
        train_correct += (predicted == data.y).sum().item()
        train_total += data.y.size(0)
    
    avg_loss = total_loss / len(train_loader)
    train_acc = train_correct / train_total
    
    # --- ĐÁNH GIÁ (TEST) ---
    # Đánh giá sau mỗi 5 epoch (hoặc mỗi epoch nếu muốn theo dõi sát)
    if epoch % 5 == 0:
        test_acc, test_mae, test_rmse = evaluate(test_loader)
        
        print(f"Epoch {epoch:03d} | Loss: {avg_loss:.4f} | Train Acc: {train_acc:.4f} | "
              f"Test Acc: {test_acc:.4f} | MAE: {test_mae:.4f} | RMSE: {test_rmse:.4f}")
    else:
        print(f"Epoch {epoch:03d} | Loss: {avg_loss:.4f} | Train Acc: {train_acc:.4f}")

torch.save(model.state_dict(), 'lol_gat_model.pth')
print("Huấn luyện hoàn tất và đã lưu model!")

Epoch 001 | Loss: 0.6833 | Train Acc: 0.5643
Epoch 002 | Loss: 0.6750 | Train Acc: 0.5630
Epoch 003 | Loss: 0.6691 | Train Acc: 0.5663
Epoch 004 | Loss: 0.6656 | Train Acc: 0.5631
Epoch 005 | Loss: 0.6622 | Train Acc: 0.5690 | Test Acc: 0.5711 | MAE: 0.4742 | RMSE: 0.4852
Epoch 006 | Loss: 0.6602 | Train Acc: 0.5686
Epoch 007 | Loss: 0.6584 | Train Acc: 0.5700
Epoch 008 | Loss: 0.6567 | Train Acc: 0.5699
Epoch 009 | Loss: 0.6552 | Train Acc: 0.5697
Epoch 010 | Loss: 0.6539 | Train Acc: 0.5735 | Test Acc: 0.5685 | MAE: 0.4659 | RMSE: 0.4826
Epoch 011 | Loss: 0.6526 | Train Acc: 0.5732
Epoch 012 | Loss: 0.6517 | Train Acc: 0.5734
Epoch 013 | Loss: 0.6507 | Train Acc: 0.5738
Epoch 014 | Loss: 0.6498 | Train Acc: 0.5739
Epoch 015 | Loss: 0.6488 | Train Acc: 0.5733 | Test Acc: 0.5623 | MAE: 0.4638 | RMSE: 0.4813
Epoch 016 | Loss: 0.6481 | Train Acc: 0.5757
Epoch 017 | Loss: 0.6478 | Train Acc: 0.5756
Epoch 018 | Loss: 0.6474 | Train Acc: 0.5720
Epoch 019 | Loss: 0.6458 | Train Acc: 0.5762
E