# Aegis-C9 ML Prediction Model

This notebook builds a machine learning model to predict match outcomes for the Aegis-C9 esports coaching platform.

## Features:
- Win probability prediction based on player statistics
- Player performance analysis
- Real-time prediction API-ready model export

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings('ignore')

print("Libraries loaded successfully!")

## 1. Data Loading and Exploration

In [None]:
# Load the League of Legends match data
df = pd.read_csv('League of Legends Ranked Match Data  Season 15 (EUN).csv')

print(f"Dataset Shape: {df.shape}")
print(f"\nColumns ({len(df.columns)} total):")
print(df.columns.tolist())

In [None]:
# Display first few rows and basic info
df.head()

In [None]:
# Basic statistics
df.describe()

In [None]:
# Check for missing values
missing_data = df.isnull().sum()
missing_data[missing_data > 0]

## 2. Data Preprocessing

In [None]:
# Select relevant features for win prediction
feature_columns = [
    'kills', 'deaths', 'assists', 'kda_ratio', 'kill_participation',
    'gold_earned', 'gold_per_min', 'damage_dealt', 'damage_per_min',
    'damage_to_champ', 'damage_champ_per_min', 'damage_taken',
    'vision_score', 'team_baronKills', 'team_dragonKills',
    'team_towerKills', 'team_champKills', 'team_riftHeraldKills',
    'team_inhibitorKills', 'duration'
]

# Create a working copy with relevant columns
df_model = df[feature_columns + ['win', 'position', 'game_id']].copy()

# Convert 'win' to numeric (TRUE -> 1, FALSE -> 0)
df_model['win'] = df_model['win'].map({True: 1, False: 0, 'TRUE': 1, 'FALSE': 0})

print(f"Working dataset shape: {df_model.shape}")
df_model.head()

In [None]:
# Aggregate player data by game (team-level statistics)
agg_functions = {
    'kills': 'sum',
    'deaths': 'sum',
    'assists': 'sum',
    'kda_ratio': 'mean',
    'kill_participation': 'mean',
    'gold_earned': 'sum',
    'gold_per_min': 'mean',
    'damage_dealt': 'sum',
    'damage_per_min': 'mean',
    'damage_to_champ': 'sum',
    'damage_champ_per_min': 'mean',
    'damage_taken': 'sum',
    'vision_score': 'sum',
    'team_baronKills': 'first',
    'team_dragonKills': 'first',
    'team_towerKills': 'first',
    'team_champKills': 'first',
    'team_riftHeraldKills': 'first',
    'team_inhibitorKills': 'first',
    'duration': 'first',
    'win': 'first'
}

# Group by game_id and win status to get team-level data
df_team = df_model.groupby(['game_id', 'win']).agg(agg_functions).reset_index()

print(f"Team-level dataset shape: {df_team.shape}")
df_team.head()

In [None]:
# Handle any missing values
df_team = df_team.fillna(df_team.median())

# Check class distribution
print("Win/Loss Distribution:")
print(df_team['win'].value_counts())

## 3. Exploratory Data Analysis

In [None]:
# Visualize key features by win/loss
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
fig.suptitle('Key Metrics by Win/Loss Outcome', fontsize=14, fontweight='bold')

metrics = ['kills', 'gold_earned', 'damage_to_champ', 'vision_score', 'team_dragonKills', 'team_baronKills']
colors = ['#00aeef', '#ff6b35']

for ax, metric in zip(axes.flatten(), metrics):
    df_team.boxplot(column=metric, by='win', ax=ax)
    ax.set_title(metric.replace('_', ' ').title())
    ax.set_xlabel('Win (0=Loss, 1=Win)')
    
plt.tight_layout()
plt.show()

In [None]:
# Correlation heatmap
plt.figure(figsize=(14, 10))

# Select numeric columns for correlation
numeric_cols = df_team.select_dtypes(include=[np.number]).columns
corr_matrix = df_team[numeric_cols].corr()

# Create a mask for the upper triangle
mask = np.triu(np.ones_like(corr_matrix, dtype=bool))

sns.heatmap(corr_matrix, mask=mask, annot=False, cmap='coolwarm', center=0,
            square=True, linewidths=0.5)
plt.title('Feature Correlation Matrix', fontsize=14, fontweight='bold', pad=20)
plt.tight_layout()
plt.show()

In [None]:
# Feature correlation with win
win_correlation = corr_matrix['win'].drop('win').sort_values(ascending=False)

plt.figure(figsize=(10, 8))
colors = ['#00aeef' if x > 0 else '#ff6b35' for x in win_correlation.values]
win_correlation.plot(kind='barh', color=colors)
plt.title('Feature Correlation with Win Outcome', fontsize=14, fontweight='bold')
plt.xlabel('Correlation Coefficient')
plt.axvline(x=0, color='black', linestyle='-', linewidth=0.5)
plt.tight_layout()
plt.show()

## 4. Model Training

In [None]:
# Prepare features and target
feature_cols = [col for col in df_team.columns if col not in ['game_id', 'win']]
X = df_team[feature_cols]
y = df_team['win']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")

In [None]:
# Train multiple models
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000, random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(n_estimators=100, random_state=42)
}

results = {}

for name, model in models.items():
    print(f"\n{'='*50}")
    print(f"Training {name}...")
    
    # Use scaled data for logistic regression, original for tree-based
    if name == 'Logistic Regression':
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
        y_prob = model.predict_proba(X_test_scaled)[:, 1]
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1]
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_prob)
    
    results[name] = {
        'model': model,
        'accuracy': accuracy,
        'roc_auc': roc_auc,
        'predictions': y_pred,
        'probabilities': y_prob
    }
    
    print(f"Accuracy: {accuracy:.4f}")
    print(f"ROC-AUC: {roc_auc:.4f}")
    print(f"\nClassification Report:")
    print(classification_report(y_test, y_pred))

In [None]:
# Model comparison visualization
model_names = list(results.keys())
accuracies = [results[m]['accuracy'] for m in model_names]
roc_aucs = [results[m]['roc_auc'] for m in model_names]

fig, ax = plt.subplots(figsize=(10, 6))

x = np.arange(len(model_names))
width = 0.35

bars1 = ax.bar(x - width/2, accuracies, width, label='Accuracy', color='#00aeef')
bars2 = ax.bar(x + width/2, roc_aucs, width, label='ROC-AUC', color='#ff6b35')

ax.set_xlabel('Model')
ax.set_ylabel('Score')
ax.set_title('Model Performance Comparison', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(model_names, rotation=15)
ax.legend()
ax.set_ylim(0, 1.1)

# Add value labels
for bar in bars1 + bars2:
    height = bar.get_height()
    ax.annotate(f'{height:.3f}',
                xy=(bar.get_x() + bar.get_width() / 2, height),
                xytext=(0, 3),
                textcoords="offset points",
                ha='center', va='bottom', fontsize=9)

plt.tight_layout()
plt.show()

## 5. Feature Importance Analysis

In [None]:
# Get feature importance from Random Forest (best performer typically)
rf_model = results['Random Forest']['model']
feature_importance = pd.DataFrame({
    'feature': feature_cols,
    'importance': rf_model.feature_importances_
}).sort_values('importance', ascending=False)

plt.figure(figsize=(12, 8))
colors = ['#00aeef' if i < 10 else '#94a3b8' for i in range(len(feature_importance))]
plt.barh(feature_importance['feature'], feature_importance['importance'], color=colors)
plt.xlabel('Importance Score')
plt.title('Feature Importance (Random Forest)', fontsize=14, fontweight='bold')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()

print("\nTop 10 Most Important Features:")
feature_importance.head(10)

## 6. Real-Time Win Probability Prediction Function

In [None]:
# Select the best model
best_model_name = max(results.keys(), key=lambda k: results[k]['roc_auc'])
best_model = results[best_model_name]['model']
print(f"Best Model: {best_model_name}")
print(f"ROC-AUC: {results[best_model_name]['roc_auc']:.4f}")

In [None]:
def predict_win_probability(game_stats: dict, model=best_model, feature_columns=feature_cols):
    """
    Predict the win probability based on current game statistics.
    
    Parameters:
    -----------
    game_stats : dict
        Dictionary containing current game statistics
        Required keys: kills, deaths, assists, gold_earned, etc.
    model : trained sklearn model
        The prediction model to use
    feature_columns : list
        List of feature column names in correct order
    
    Returns:
    --------
    dict
        Contains 'win_probability' and 'prediction'
    """
    # Create DataFrame with correct column order
    input_data = pd.DataFrame([game_stats])[feature_columns]
    
    # Make prediction
    win_prob = model.predict_proba(input_data)[0][1]
    prediction = 'Win' if win_prob >= 0.5 else 'Loss'
    
    return {
        'win_probability': round(win_prob * 100, 2),
        'prediction': prediction,
        'confidence': round(abs(win_prob - 0.5) * 200, 2)
    }

# Example usage with mock data
example_game = {
    'kills': 15,
    'deaths': 8,
    'assists': 25,
    'kda_ratio': 3.5,
    'kill_participation': 0.65,
    'gold_earned': 55000,
    'gold_per_min': 450,
    'damage_dealt': 150000,
    'damage_per_min': 1200,
    'damage_to_champ': 80000,
    'damage_champ_per_min': 650,
    'damage_taken': 70000,
    'vision_score': 120,
    'team_baronKills': 1,
    'team_dragonKills': 3,
    'team_towerKills': 6,
    'team_champKills': 20,
    'team_riftHeraldKills': 1,
    'team_inhibitorKills': 2,
    'duration': 1800
}

result = predict_win_probability(example_game)
print("\n=== Prediction Result ===")
print(f"Win Probability: {result['win_probability']}%")
print(f"Predicted Outcome: {result['prediction']}")
print(f"Confidence: {result['confidence']}%")

## 7. Player Performance Analysis Model

In [None]:
# Create player-level performance metrics
df_player = df[[
    'participant_id', 'champion_name', 'position', 'win',
    'kills', 'deaths', 'assists', 'kda_ratio', 'kill_participation',
    'gold_per_min', 'damage_champ_per_min', 'vision_score'
]].copy()

# Convert win to numeric
df_player['win'] = df_player['win'].map({True: 1, False: 0, 'TRUE': 1, 'FALSE': 0})

# Calculate performance score
df_player['performance_score'] = (
    df_player['kda_ratio'] * 0.3 +
    df_player['kill_participation'] * 100 * 0.2 +
    df_player['gold_per_min'] / 50 * 0.2 +
    df_player['damage_champ_per_min'] / 100 * 0.2 +
    df_player['vision_score'] / 5 * 0.1
)

df_player.head()

In [None]:
# Performance by position
position_stats = df_player.groupby('position').agg({
    'performance_score': 'mean',
    'kda_ratio': 'mean',
    'win': 'mean'
}).round(2)

plt.figure(figsize=(10, 6))
position_stats['performance_score'].plot(kind='bar', color='#00aeef')
plt.title('Average Performance Score by Position', fontsize=14, fontweight='bold')
plt.xlabel('Position')
plt.ylabel('Performance Score')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

position_stats

## 8. Export Model for Production

In [None]:
import joblib
import json

# Save the best model
joblib.dump(best_model, 'aegis_win_prediction_model.pkl')
joblib.dump(scaler, 'aegis_feature_scaler.pkl')

# Save feature columns configuration
config = {
    'feature_columns': feature_cols,
    'model_name': best_model_name,
    'model_accuracy': results[best_model_name]['accuracy'],
    'model_roc_auc': results[best_model_name]['roc_auc']
}

with open('aegis_model_config.json', 'w') as f:
    json.dump(config, f, indent=2)

print("Model exported successfully!")
print(f"- Model file: aegis_win_prediction_model.pkl")
print(f"- Scaler file: aegis_feature_scaler.pkl")
print(f"- Config file: aegis_model_config.json")

## 9. API-Ready Prediction Class

In [None]:
class AegisPredictionEngine:
    """
    Production-ready prediction engine for Aegis-C9 platform.
    Can be deployed as an API service.
    """
    
    def __init__(self, model_path='aegis_win_prediction_model.pkl', 
                 config_path='aegis_model_config.json'):
        self.model = joblib.load(model_path)
        with open(config_path, 'r') as f:
            self.config = json.load(f)
        self.feature_columns = self.config['feature_columns']
    
    def predict(self, game_stats: dict) -> dict:
        """
        Make a win probability prediction.
        
        Parameters:
        -----------
        game_stats : dict
            Current game statistics
        
        Returns:
        --------
        dict with keys:
            - win_probability: float (0-100)
            - prediction: str ('Win' or 'Loss')
            - confidence: float (0-100)
            - risk_level: str ('Low', 'Medium', 'High')
        """
        input_data = pd.DataFrame([game_stats])[self.feature_columns]
        win_prob = self.model.predict_proba(input_data)[0][1]
        
        # Determine risk level
        if win_prob >= 0.7:
            risk_level = 'Low'
        elif win_prob >= 0.4:
            risk_level = 'Medium'
        else:
            risk_level = 'High'
        
        return {
            'win_probability': round(win_prob * 100, 2),
            'prediction': 'Win' if win_prob >= 0.5 else 'Loss',
            'confidence': round(abs(win_prob - 0.5) * 200, 2),
            'risk_level': risk_level,
            'model_info': {
                'name': self.config['model_name'],
                'accuracy': self.config['model_accuracy']
            }
        }
    
    def get_feature_requirements(self) -> list:
        """Return the list of required features for prediction."""
        return self.feature_columns

# Test the prediction engine
engine = AegisPredictionEngine()
prediction = engine.predict(example_game)

print("\n=== Aegis Prediction Engine Test ===")
print(json.dumps(prediction, indent=2))

## Summary

This notebook has created:

1. **Win Probability Model** - Predicts match outcomes based on in-game statistics
2. **Feature Importance Analysis** - Identifies key factors affecting win probability
3. **Player Performance Scoring** - Evaluates individual player contributions
4. **Production-Ready Export** - Model files ready for API deployment

### Integration with Aegis-C9 Frontend:
- The exported model can be loaded in a Python backend/API
- Real-time predictions can be sent to the frontend via WebSocket
- The win probability displayed on the dashboard can use these predictions