In [1]:
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
import requests
import pandas as pd


In [2]:

def get_champion_data(version, champ_name):
    url = f"https://ddragon.leagueoflegends.com/cdn/{version}/data/en_US/champion/{champ_name}.json"
    data = requests.get(url).json()["data"]
    for champ_name, champ_info in data.items():
        stats = {key: item for key, item in champ_info["stats"].items() if key not in ["crit", "critperlevel"]}
        info = {key: item for key, item in champ_info["info"].items() if key != "difficulty"}
        row = {
            **info,
            **stats
        }
    return list(row.values())
    


In [3]:
champion_vector_mapping = {}
VERSION = "15.23.1"

def get_champion_vector(champ_name):
    if champ_name not in champion_vector_mapping:
        try:
            champion_vector_mapping[champ_name] = get_champion_data(VERSION, champ_name)
        except:
            champion_vector_mapping[champ_name] = [0] * 21
    return champion_vector_mapping[champ_name]

champ_cols = [
    "BLUE_TOP","BLUE_JUNGLE","BLUE_MIDDLE","BLUE_BOTTOM","BLUE_UTILITY",
    "RED_TOP","RED_JUNGLE","RED_MIDDLE","RED_BOTTOM","RED_UTILITY"
]
def row_to_vector(row):
    vectors = []
    for col in champ_cols:
        champ = row[col]
        vectors.extend(get_champion_vector(champ))
    return vectors



In [4]:
get_champion_data("15.23.1","Fiddlesticks")

[2,
 3,
 9,
 650,
 106,
 500,
 28,
 335,
 34,
 4.7,
 30,
 1.3,
 480,
 5.5,
 0.6,
 8,
 0.8,
 55,
 2.65,
 2.11,
 0.625]

In [5]:
df = pd.read_csv("preprocessed_matches.csv")
X = df.apply(row_to_vector, axis=1, result_type="expand")
y = df["RED_WINNER"].astype(int)

print(f"Feature matrix shape: {X.shape}")
print(f"Target vector shape: {y.shape}")

Feature matrix shape: (14281, 210)
Target vector shape: (14281,)


In [6]:
X.isna().sum().sum()

np.int64(0)

In [7]:
df['RED_WINNER'].value_counts()

RED_WINNER
False    7395
True     6886
Name: count, dtype: int64

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from matplotlib import pyplot as plt
vecs = np.array(list(champion_vector_mapping.values()))
names = np.array(list(champion_vector_mapping.keys()))

scaler = StandardScaler()
scaled = scaler.fit_transform(vecs)


pca = PCA(n_components=2)
coords = pca.fit_transform(scaled)
plt.figure(figsize=(10,8))

plt.scatter(coords[:,0], coords[:,1])
for name, (x,y) in zip(names, coords):
    plt.text(x, y, name, fontsize=8)

plt.xlim(-3, 3)   # zoom x-axis
plt.ylim(-4, 4)   # zoom y-axis


plt.title("Champion Embedding PCA (Zoomed)")
plt.show()





In [None]:
rows = []
import seaborn as sns

for _, row in df.iterrows():
    rows.append({
        "ADC": row["RED_BOTTOM"],
        "SUPPORT": row["RED_UTILITY"],
        "WIN": row["RED_WINNER"]
    })
    rows.append({
        "ADC": row["BLUE_BOTTOM"],
        "SUPPORT": row["BLUE_UTILITY"],
        "WIN": 1 - row["RED_WINNER"]
    })

bot_sup_pairs = pd.DataFrame(rows)
bot_sup_pairs["WIN"] = bot_sup_pairs["WIN"].astype(float)

adc_counts = bot_sup_pairs["ADC"].value_counts()
sup_counts = bot_sup_pairs["SUPPORT"].value_counts()

top_N_adc = adc_counts.head(15).index
top_N_sup = sup_counts.head(15).index

pivot = bot_sup_pairs.pivot_table(
    index="SUPPORT",
    columns="ADC",
    values="WIN",
    aggfunc="mean"
)

valid_adc = pivot.columns.intersection(top_N_adc)
valid_sup = pivot.index.intersection(top_N_sup)

pivot_small = pivot.loc[valid_sup, valid_adc]

plt.figure(figsize=(14,10))
sns.heatmap(pivot_small, cmap="coolwarm", annot=True, fmt=".2f")
plt.title("Bot Lane Duo Win Rate Heatmap (Top 15 ADC Ã— Top 15 Support)")
plt.xlabel("ADC Champion")
plt.ylabel("Support Champion")
plt.show()


In [6]:
scalar = StandardScaler()

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
gb = GradientBoostingClassifier(random_state=42)

param_grid = {
    'n_estimators': [100, 200],
    'learning_rate': [0.05, 0.1, 0.2],
    'max_depth': [2, 3, 4],
    'subsample': [0.8, 1.0]
}

grid = GridSearchCV(
    gb,
    param_grid=param_grid,
    cv=3,
    scoring='accuracy',
    verbose=2,
    n_jobs=-1
)

grid.fit(X_train, y_train)

y_pred = grid.predict(X_test)

print("\nBest parameters:", grid.best_params_)
print("Best CV accuracy:", grid.best_score_)
print("\nTest accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion matrix:")
print(confusion_matrix(y_test, y_pred))

In [8]:
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(random_state=42)

param_grid = {
    'n_estimators': [200, 400, 600],  
    'max_depth': [None, 10, 20, 30],    
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
}

grid = GridSearchCV(
    rfc,
    param_grid=param_grid,
    cv=3,
    scoring='accuracy',
    verbose=2,
    n_jobs=-1
)

grid.fit(X_train, y_train)

y_pred = grid.predict(X_test)

print("\nBest parameters:", grid.best_params_)
print("Best CV accuracy:", grid.best_score_)
print("\nTest accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion matrix:")
print(confusion_matrix(y_test, y_pred))


Fitting 3 folds for each of 108 candidates, totalling 324 fits

Best parameters: {'max_depth': None, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 400}
Best CV accuracy: 0.5340511204481793

Test accuracy: 0.5302765138256913

Confusion matrix:
[[1042  437]
 [ 905  473]]
