# League of Legends — Win Prediction (Logistic Regression, PyTorch)

**Sections**
- Data Loading & Preprocessing
- Model Definition
- Training
- Regularization (L2)
- Evaluation & Visualization
- Model Persistence
- Hyperparameter Tuning
- Feature Importance

<img src="https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/assets/logos/SN_web_lightmode.png" height=300 width=300 />

# Final Project: League of Legends Match Predictor

In [None]:
!pip install pandas
!pip install scikit-learn
!pip install torch
!pip install matplotlib


In [None]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
import os

for root, dirs, files in os.walk("/"):
    for file in files:
        if file == "league_of_legends_data_large.csv":
            print(os.path.join(root, file))


In [None]:
import os
print(os.listdir("."))


In [None]:
data = pd.read_csv("league_of_legends_data_large.csv")
print(data.shape)
print(data.head())

In [None]:
X = data.drop(columns=["win"]).to_numpy(dtype=np.float32)
y = data["win"].to_numpy(dtype=np.float32).reshape(-1, 1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

In [None]:
# convert to tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32)
X_test_t  = torch.tensor(X_test,  dtype=torch.float32)
y_test_t  = torch.tensor(y_test,  dtype=torch.float32)

In [None]:
""" Task 2 — Model Definition:
    - Define a Logistic Regression model class inheriting from nn.Module
    - Implement a single Linear layer with a Sigmoid activation to output probabilities
    - Initialize the model, Binary Cross-Entropy loss function, and SGD optimizer
"""


import torch
import torch.nn as nn
import torch.optim as optim

input_dim = X_train.shape[1] 

class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.linear = nn.Linear(input_dim, 1)
        self.sigmoid = nn.Sigmoid()  
    def forward(self, x):
        return self.sigmoid(self.linear(x))

model = LogisticRegressionModel(input_dim)
criterion = nn.BCELoss()                    # Binary Cross-Entropy
optimizer = optim.SGD(model.parameters(), lr=0.01)

print(model)

In [None]:
# ===== Step 3: Train the logistic regression model =====

# init
model = LogisticRegressionModel(input_dim)
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# train
epochs = 1000
for epoch in range(1, epochs + 1):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_t)
    loss = criterion(outputs, y_train_t)
    loss.backward()
    optimizer.step()
    if epoch % 100 == 0 or epoch == 1:
        print(f"Epoch {epoch:4d} | Loss: {loss.item():.6f}")

# evaluate
model.eval()
with torch.no_grad():
    train_preds = (model(X_train_t) >= 0.5).float()
    test_preds  = (model(X_test_t)  >= 0.5).float()
    train_acc = (train_preds.eq(y_train_t).sum() / y_train_t.shape[0]).item()
    test_acc  = (test_preds.eq(y_test_t).sum()  / y_test_t.shape[0]).item()

print(f"\nTraining Accuracy: {train_acc*100:.2f}%")
print(f"Testing  Accuracy: {test_acc*100:.2f}%")


In [None]:
# Step 4: Model Optimization (L2) + Evaluation

import torch
import torch.nn as nn
import torch.optim as optim

model_l2 = LogisticRegressionModel(input_dim)
criterion = nn.BCELoss()
optimizer_l2 = optim.SGD(model_l2.parameters(), lr=0.01, weight_decay=0.01)

epochs = 1000
for epoch in range(1, epochs + 1):
    model_l2.train()
    optimizer_l2.zero_grad()
    outputs = model_l2(X_train_t)
    loss = criterion(outputs, y_train_t)
    loss.backward()
    optimizer_l2.step()
    if epoch % 100 == 0 or epoch == 1:
        print(f"[L2] Epoch {epoch:4d} | Loss: {loss.item():.6f}")

model_l2.eval()
with torch.no_grad():
    train_preds_l2 = (model_l2(X_train_t) >= 0.5).float()
    test_preds_l2  = (model_l2(X_test_t)  >= 0.5).float()
    train_acc_l2 = (train_preds_l2.eq(y_train_t).sum() / y_train_t.shape[0]).item()
    test_acc_l2  = (test_preds_l2.eq(y_test_t).sum()  / y_test_t.shape[0]).item()

print(f"\n[L2] Training Accuracy: {train_acc_l2*100:.2f}%")
print(f"[L2] Testing  Accuracy: {test_acc_l2*100:.2f}%")


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
import itertools

# Retrain the model with L2 regularization
model = LogisticRegressionModel(input_dim)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, weight_decay=0.01)

epochs = 1000
for epoch in range(1, epochs + 1):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_t)
    loss = criterion(outputs, y_train_t)
    loss.backward()
    optimizer.step()
    if epoch % 100 == 0:
        print(f"[L2] Epoch {epoch:4d} | Loss: {loss.item():.6f}")

# Evaluation
model.eval()
with torch.no_grad():
    y_pred_test = model(X_test_t)
    y_pred_test_labels = (y_pred_test > 0.5).float()

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred_test_labels)
plt.figure(figsize=(6, 6))
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = range(2)
plt.xticks(tick_marks, ['Loss', 'Win'], rotation=45)
plt.yticks(tick_marks, ['Loss', 'Win'])

thresh = cm.max() / 2
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(j, i, cm[i, j],
             horizontalalignment="center",
             color="white" if cm[i, j] > thresh else "black")

plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

# Classification Report
print("Classification Report:\n", classification_report(y_test, y_pred_test_labels, target_names=['Loss', 'Win']))

# ROC Curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_test)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc="lower right")
plt.show()




In [None]:
# Step 6: Model Saving and Loading

# Save the trained model parameters
torch.save(model.state_dict(), 'logistic_regression_model.pth')

# Create a new model instance
loaded_model = LogisticRegressionModel(input_dim)

# Load the saved state dictionary into the new model
loaded_model.load_state_dict(torch.load('logistic_regression_model.pth'))

# Set the model to evaluation mode
loaded_model.eval()

# Evaluate the loaded model on the test dataset
with torch.no_grad():
    y_pred_test_loaded = loaded_model(X_test_t)
    y_pred_test_labels_loaded = (y_pred_test_loaded > 0.5).float()
    accuracy_loaded = (y_pred_test_labels_loaded.eq(y_test_t).sum().item()) / y_test_t.shape[0]

print(f"Loaded Model Test Accuracy: {accuracy_loaded * 100:.2f}%")

In [None]:
# Step 7: Hyperparameter Tuning (learning rate)

import torch
import torch.nn as nn
import torch.optim as optim

lrs = [0.01, 0.05, 0.1]
results = []

def acc_from_probs(probs, y_true, thr=0.5):
    preds = (probs >= thr).float()
    return (preds.eq(y_true).sum().item()) / y_true.shape[0]

epochs = 100

for lr in lrs:
    # Reinitialize model and optimizer for each LR
    model_tune = LogisticRegressionModel(input_dim)
    criterion_tune = nn.BCELoss()
    optimizer_tune = optim.SGD(model_tune.parameters(), lr=lr)

    # Train
    for _ in range(epochs):
        model_tune.train()
        optimizer_tune.zero_grad()
        out = model_tune(X_train_t)
        loss = criterion_tune(out, y_train_t)
        loss.backward()
        optimizer_tune.step()

    # Evaluate on test
    model_tune.eval()
    with torch.no_grad():
        test_probs = model_tune(X_test_t)
        test_acc = acc_from_probs(test_probs, y_test_t, thr=0.5)

    results.append((lr, test_acc))
    print(f"LR={lr} -> Test Accuracy: {test_acc*100:.2f}%")

# Pick best
best_lr, best_acc = sorted(results, key=lambda x: x[1], reverse=True)[0]
print(f"\nBest LR: {best_lr} with Test Accuracy: {best_acc*100:.2f}%")

In [None]:
# Step 8: Feature Importance

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 1) Extract weights from the linear layer
weights = model.linear.weight.detach().cpu().numpy().flatten()

# 2) Feature names (align with your dataset)
features = data.drop(columns=["win"]).columns

# 3) Build a DataFrame and sort by absolute importance
fi = pd.DataFrame({
    "Feature": features,
    "Weight": weights,
    "Importance": np.abs(weights)
}).sort_values("Importance", ascending=False).reset_index(drop=True)

print(fi.head(10))  # top features

# 4) Plot (top-k for readability)
top_k = min(20, len(fi))
plt.figure(figsize=(10, max(4, top_k*0.35)))
plt.barh(fi["Feature"].iloc[:top_k][::-1], fi["Importance"].iloc[:top_k][::-1])
plt.xlabel("Absolute Weight (Importance)")
plt.title("Top Feature Importances — Logistic Regression")
plt.tight_layout()
plt.show()


#### Conclusion:  

Congratulations on completing the project! In this final project, you built a logistic regression model to predict the outcomes of League of Legends matches based on various in-game statistics. This comprehensive project involved several key steps, including data loading and preprocessing, model implementation, training, optimization, evaluation, visualization, model saving and loading, hyperparameter tuning, and feature importance analysis. This project provided hands-on experience with the complete workflow of developing a machine learning model for binary classification tasks using PyTorch.

© Copyright IBM Corporation. All rights reserved.