# League of Legends Match Predictor

### Introduction
League of Legends, a popular multiplayer online battle arena (MOBA) game, generates extensive data from matches, providing an excellent opportunity to apply machine learning techniques to real-world scenarios. Perform the following steps to build a logistic regression model aimed at predicting the outcomes of League of Legends matches. 

In [18]:
from pathlib import Path
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import DataLoader, TensorDataset


In [12]:
data_path = Path(Path.cwd()).resolve().parents[0] / "project" / "league_of_legends_data_large.csv"


df = pd.read_csv(data_path)
X = df.drop('win', axis=1)
y = df['win']

display(X)
display(y)

Unnamed: 0,kills,deaths,assists,gold_earned,cs,wards_placed,wards_killed,damage_dealt
0,16,6,19,17088,231,11,7,15367
1,8,8,5,14865,259,10,2,38332
2,0,17,11,15919,169,14,5,24642
3,19,11,1,11534,264,14,3,15789
4,12,7,6,18926,124,15,7,40268
...,...,...,...,...,...,...,...,...
995,2,15,12,17170,294,8,6,33469
996,5,13,4,19524,236,14,3,8845
997,8,7,8,7961,139,11,7,49650
998,5,17,5,8226,193,9,9,28290


0      0
1      1
2      0
3      0
4      0
      ..
995    0
996    0
997    1
998    1
999    0
Name: win, Length: 1000, dtype: int64

### Train Test Split

In [38]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
display(f'X_train shape: {X_train.shape}')
display(f'y_train shape: {y_train.shape}')
display(f'X_test shape: {X_test.shape}')
display(f'y_test shape: {y_test.shape}')

'X_train shape: (800, 8)'

'y_train shape: (800,)'

'X_test shape: (200, 8)'

'y_test shape: (200,)'

### Standardize the feature using StandardScaler

In [39]:
scaler = StandardScaler()
# Fit the scaler on the training data and transform it
X_train = scaler.fit_transform(X_train)
# Transform the test data using the same scaler
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.long)
y_test = torch.tensor(y_test.values, dtype=torch.long)

# Create DataLoader for training and test sets
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False)



### Logistic Regression Model

In [40]:
import torch.nn as nn

class LogisticRegressionNet(nn.Module):
    
    # Constructor
    def __init__(self, n_inputs):
        super(LogisticRegressionNet, self).__init__()
        self.linear = nn.Linear(n_inputs, 1)
        
    # Prediction
    def forward(self, x):
        yhat = torch.sigmoid(self.linear(x))
        return yhat

# Instantiate the model

input_dim = X_train.shape[1]  # Number of features

model = LogisticRegressionNet(n_inputs=input_dim)


In [41]:
print(model)

LogisticRegressionNet(
  (linear): Linear(in_features=8, out_features=1, bias=True)
)


In [42]:
import torch.optim as optim

# Define the loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [43]:
epochs = 1000
train_losses = []
test_losses = []





In [45]:
for epoch in range(epochs):
    # Training phase
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in train_loader:
        y_batch = y_batch.view(-1, 1).float()
        
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    train_loss = running_loss / len(train_loader)
    train_losses.append(train_loss)
    
    # Evaluation phase on test set
    model.eval()
    test_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            y_batch = y_batch.view(-1, 1).float()
            test_outputs = model(X_batch)
            loss = criterion(test_outputs, y_batch)
            test_loss += loss.item()

    test_loss /= len(test_loader)
    test_losses.append(test_loss)
    
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')


Epoch [100/1000], Train Loss: 0.6953, Test Loss: 0.6827
Epoch [200/1000], Train Loss: 0.6955, Test Loss: 0.6841
Epoch [300/1000], Train Loss: 0.6953, Test Loss: 0.6814
Epoch [400/1000], Train Loss: 0.6955, Test Loss: 0.6831
Epoch [500/1000], Train Loss: 0.6955, Test Loss: 0.6836
Epoch [600/1000], Train Loss: 0.6951, Test Loss: 0.6780
Epoch [700/1000], Train Loss: 0.6956, Test Loss: 0.6846
Epoch [800/1000], Train Loss: 0.6949, Test Loss: 0.6860
Epoch [900/1000], Train Loss: 0.6954, Test Loss: 0.6830
Epoch [1000/1000], Train Loss: 0.6950, Test Loss: 0.6794


In [57]:
model.eval()
with torch.no_grad():
    # Get predictions on training set
    train_predictions = model(X_train_tensor)
    
    # Get predictions on test set  
    test_predictions = model(X_test_tensor)

# The predictions are probabilities (0 to 1) from sigmoid
print("Raw predictions (probabilities):")
print(f"Train predictions shape: {train_predictions.shape}")
print(f"Test predictions shape: {test_predictions.shape}")
print(f"Sample train predictions: {train_predictions[:5].flatten()}")
print(f"Sample test predictions: {test_predictions[:5].flatten()}")

# Convert probabilities to binary predictions (0 or 1)
# Using threshold of 0.5
train_pred_binary = (train_predictions > 0.5).float()
test_pred_binary = (test_predictions > 0.5).float()

print(f"\nBinary predictions (0 or 1):")
print(f"Sample train binary predictions: {train_pred_binary[:5].flatten()}")
print(f"Sample test binary predictions: {test_pred_binary[:5].flatten()}")

# Calculate accuracy
train_accuracy = (train_pred_binary == y_train_tensor).float().mean()
test_accuracy = (test_pred_binary == y_test_tensor).float().mean()

print(f"\nModel Performance:")
print(f"Training Accuracy: {train_accuracy.item():.4f} ({train_accuracy.item()*100:.2f}%)")
print(f"Test Accuracy: {test_accuracy.item():.4f} ({test_accuracy.item()*100:.2f}%)")

Raw predictions (probabilities):
Train predictions shape: torch.Size([800, 1])
Test predictions shape: torch.Size([200, 1])
Sample train predictions: tensor([0.5901, 0.5111, 0.5054, 0.5643, 0.4990])
Sample test predictions: tensor([0.5219, 0.4898, 0.5074, 0.5204, 0.5557])

Binary predictions (0 or 1):
Sample train binary predictions: tensor([1., 1., 1., 1., 0.])
Sample test binary predictions: tensor([1., 0., 1., 1., 1.])

Model Performance:
Training Accuracy: 0.5375 (53.75%)
Test Accuracy: 0.5600 (56.00%)


### Model Optimization