In [8]:
import pandas as pd
import numpy as np
from torch import nn
import torch
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split


In [9]:
champions_df = pd.read_csv('processed_data/champions_df.csv')
items_df = pd.read_csv('processed_data/items_df.csv')
other_features_df = pd.read_csv('processed_data/other_features_df.csv')
item_encoder = pd.read_pickle('processed_data/item_encoder.pkl')
champion_encoder = pd.read_pickle('processed_data/champion_encoder.pkl')

In [7]:
# define item file and get items name from the json
import json

with open('item.json', 'r') as file:
    items = json.load(file)


items['data']['1001']


{'name': 'Boots',
 'description': '<mainText><stats><attention>25</attention> Move Speed</stats><br><br></mainText>',
 'colloq': ';',
 'plaintext': 'Slightly increases Move Speed',
 'into': ['3005',
  '3047',
  '3006',
  '3009',
  '3010',
  '3020',
  '3111',
  '3117',
  '3158'],
 'image': {'full': '1001.png',
  'sprite': 'item0.png',
  'group': 'item',
  'x': 0,
  'y': 0,
  'w': 48,
  'h': 48},
 'gold': {'base': 300, 'purchasable': True, 'total': 300, 'sell': 210},
 'tags': ['Boots'],
 'maps': {'11': True,
  '12': True,
  '21': True,
  '22': False,
  '30': False,
  '33': False},
 'stats': {'FlatMovementSpeedMod': 25}}

In [12]:
# since we are working with multi label classification I will try to work with NN

class LoLItemizationModel(nn.Module):
    def __init__(self, num_champions, num_other_features, embedding_dim=32, hidden_dim=64, output_dim=100):
        super(LoLItemizationModel, self).__init__()
        
        # Champion embedding layer
        self.champion_embedding = nn.Embedding(num_champions, embedding_dim)
        
        # Layers for processing champion embeddings
        self.champion_layers = nn.Sequential(
            nn.Linear(embedding_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU()
        )
        
        # Layers for processing other features and combining with champion features
        self.combined_layers = nn.Sequential(
            nn.Linear(hidden_dim + num_other_features, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim),
            nn.Sigmoid()
        )
        
    def forward(self, champion_ids, other_features):
        # Process champion embeddings
        champion_embedded = self.champion_embedding(champion_ids)
        champion_features = self.champion_layers(champion_embedded)
        
        # Combine champion features with other features
        combined = torch.cat([champion_features, other_features], dim=1)
        
        # Process combined features
        output = self.combined_layers(combined)
        
        return output

In [11]:
from sklearn.model_selection import train_test_split


X_champion = torch.LongTensor(champions_df['champion_id'].values)
X_other = torch.FloatTensor(other_features_df.values)
y = torch.FloatTensor(items_df.values)

X_champion_temp, X_champion_test, X_other_temp, X_other_test, y_temp, y_test = train_test_split(
    X_champion, X_other, y, test_size=0.2, random_state=42
)

# second split: split the remaining data into train and validation sets
X_champion_train, X_champion_val, X_other_train, X_other_val, y_train, y_val = train_test_split(
    X_champion_temp, X_other_temp, y_temp, test_size=0.25, random_state=42  # 0.25 * 0.8 = 0.2
)

In [None]:


train_data = TensorDataset(X_champion_train, X_other_train, y_train)
val_data = TensorDataset(X_champion_val, X_other_val, y_val)

batch_size = 64
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size)

# Instantiate the model
num_champions = len(champion_encoder.classes_)
num_other_features = other_features_df.shape[1]
output_dim = items_df.shape[1]

model = LoLItemizationModel(num_champions, num_other_features, output_dim=output_dim)

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters())

# Training loop
num_epochs = 50
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for champion_ids, other_features, labels in train_loader:
        champion_ids, other_features, labels = champion_ids.to(device), other_features.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(champion_ids, other_features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * champion_ids.size(0)
    
    train_loss /= len(train_loader.dataset)
    
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for champion_ids, other_features, labels in val_loader:
            champion_ids, other_features, labels = champion_ids.to(device), other_features.to(device), labels.to(device)
            outputs = model(champion_ids, other_features)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * champion_ids.size(0)
    
    val_loss /= len(val_loader.dataset)
    
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

# Save the trained model
torch.save(model.state_dict(), 'lol_itemization_model.pth')

print("Training complete. Model saved as 'lol_itemization_model.pth'")

In [22]:
# let's define a function that takes in item number and outputs the item name

def get_item_name(item_number, item_encoder, items):
    # Create a binary vector for the single item
    binary_vector = np.zeros((1, len(item_encoder.classes_)))
    binary_vector[0, item_number] = 1
    
    # Use inverse_transform on the binary vector
    item_id = item_encoder.inverse_transform(binary_vector)[0][0]
    item_name = items['data'][str(item_id)]['name']
    return item_name

# write a function that tests model on a single example from test set

def test_model(model, test_loader):

    model.eval()
    with torch.no_grad():
        champion_ids, other_features, labels = next(iter(test_loader))
        champion_ids, other_features, labels = champion_ids.to(device), other_features.to(device), labels.to(device)
        outputs = model(champion_ids, other_features)
        predicted_items = outputs.cpu().numpy()
    
    # Print the predicted items
    print("Predicted items:")
   
    for i, item in enumerate(predicted_items[0]):
        if item >= 0.2:
            print(get_item_name(i,item_encoder, items))

def load_model(file_path, num_champions, num_other_features, output_dim):
    model = LoLItemizationModel(num_champions, num_other_features, output_dim=output_dim)
    
    # Load the state dict
    state_dict = torch.load(file_path)
    
    model.load_state_dict(state_dict)
    
    model.eval()
    
    return model


# Test the model


num_champions = len(champion_encoder.classes_)
num_other_features = other_features_df.shape[1]
output_dim = items_df.shape[1]

#read the model from pth
model = load_model('lol_itemization_model.pth', num_champions, num_other_features, output_dim)
print(X_champion_val[:1], X_other_val[:1], y_val[:1])
val_data = TensorDataset(X_champion_val[:1], X_other_val[:1], y_val[:1])
test_loader = DataLoader(val_data, batch_size=1)
test_model(model, test_loader)

tensor([66]) tensor([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
         0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
         0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0.]]) tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,


  state_dict = torch.load(file_path)


In [14]:
class LoLItemizationModel_v2(nn.Module):
    def __init__(self, num_champions, num_other_features, output_dim, embedding_dim=32, hidden_dim=64, dropout_rate=0.5):
        super(LoLItemizationModel_v2, self).__init__()  
        

        self.champion_embedding = nn.Embedding(num_champions, embedding_dim)
        
        self.champion_layers = nn.Sequential(
            nn.Linear(embedding_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout_rate)
        )
        
        self.combined_layers = nn.Sequential(
            nn.Linear(hidden_dim + num_other_features, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim),
            nn.Sigmoid()
        )
        
    def forward(self, champion_ids, other_features):
        champion_embedded = self.champion_embedding(champion_ids)
        champion_features = self.champion_layers(champion_embedded)

        combined = torch.cat([champion_features, other_features], dim=1)
        
        output = self.combined_layers(combined)
        
        return output

In [None]:
X_champion = torch.LongTensor(champions_df['champion_id'].values)
X_other = torch.FloatTensor(other_features_df.values)
y = torch.FloatTensor(items_df.values)

learning_rate = 0.0005

# Split the data
X_champion_train, X_champion_val, X_other_train, X_other_val, y_train, y_val = train_test_split(
    X_champion, X_other, y, test_size=0.2, random_state=42
)

# Create DataLoader
train_data = TensorDataset(X_champion_train, X_other_train, y_train)
val_data = TensorDataset(X_champion_val, X_other_val, y_val)

batch_size = 128
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size)

num_champions = len(champion_encoder.classes_)
num_other_features = other_features_df.shape[1]
output_dim = items_df.shape[1]

model_v2 = LoLItemizationModel_v2(num_champions, num_other_features, output_dim)

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model_v2.parameters(), lr=learning_rate)

# Training loop
num_epochs = 100
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_v2.to(device)

for epoch in range(num_epochs):
    model_v2.train()
    train_loss = 0.0
    for champion_ids, other_features, labels in train_loader:
        champion_ids, other_features, labels = champion_ids.to(device), other_features.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model_v2(champion_ids, other_features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * champion_ids.size(0)
    
    train_loss /= len(train_loader.dataset)
    
    # Validation
    model_v2.eval()
    val_loss = 0.0
    with torch.no_grad():
        for champion_ids, other_features, labels in val_loader:
            champion_ids, other_features, labels = champion_ids.to(device), other_features.to(device), labels.to(device)
            outputs = model_v2(champion_ids, other_features)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * champion_ids.size(0)
    
    val_loss /= len(val_loader.dataset)
    
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

# Save the trained model
torch.save(model_v2.state_dict(), 'lol_itemization_model_v2.pth')

print("Training complete. Model saved as 'lol_itemization_model_v2.pth'")

In [None]:
class RevisedLoLItemizationModel(nn.Module):
    def __init__(self, num_champions, num_other_features, hidden_dim=128, output_dim=100):
        super(RevisedLoLItemizationModel, self).__init__()
        
        self.champion_weight = nn.Parameter(torch.ones(num_champions))
        
        self.layers = nn.Sequential(
            nn.Linear(num_champions + num_other_features, hidden_dim),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_dim),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_dim),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim, output_dim),
            nn.Sigmoid()
        )
        
    def forward(self, champions, other_features):
        # Apply weights to champion features
        weighted_champions = champions * self.champion_weight
        
        # Combine weighted champion features with other features
        combined = torch.cat([weighted_champions, other_features], dim=1)
        
        output = self.layers(combined)
        
        return output