In [3]:
import zipfile
import os
import pandas as pd

# Path to the zip file
zip_path = "archive.zip"  # Replace with your zip file name

# Directory to extract to
extract_dir = "extracted_data"  # Folder to extract the CSV files

# Extract only CSV files
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    # Filter CSV files
    csv_files = [f for f in zip_ref.namelist() if f.endswith('.csv')]
    zip_ref.extractall(extract_dir, members=csv_files)

print(f"Extracted CSV files: {csv_files}")

Extracted CSV files: ['high_diamond_ranked_10min.csv']


In [5]:
games = pd.read_csv("extracted_data/games.csv", index_col=0)

In [6]:
games.head()

Unnamed: 0_level_0,blueWins,blueWardsPlaced,blueWardsDestroyed,blueFirstBlood,blueKills,blueDeaths,blueAssists,blueEliteMonsters,blueDragons,blueHeralds,...,redTowersDestroyed,redTotalGold,redAvgLevel,redTotalExperience,redTotalMinionsKilled,redTotalJungleMinionsKilled,redGoldDiff,redExperienceDiff,redCSPerMin,redGoldPerMin
gameId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4519157822,0,28,2,1,9,6,11,0,0,0,...,0,16567,6.8,17047,197,55,-643,8,19.7,1656.7
4523371949,0,12,1,0,5,5,5,0,0,0,...,1,17620,6.8,17438,240,52,2908,1173,24.0,1762.0
4521474530,0,15,0,0,7,11,4,1,1,0,...,0,17285,6.8,17254,203,28,1172,1033,20.3,1728.5
4524384067,0,43,1,0,4,5,5,1,0,1,...,0,16478,7.0,17961,235,47,1321,7,23.5,1647.8
4436033771,0,75,4,0,6,6,6,0,0,0,...,0,17404,7.0,18313,225,67,1004,-230,22.5,1740.4


In [7]:
games.shape

(9879, 39)

In [8]:
games.dtypes

blueWins                          int64
blueWardsPlaced                   int64
blueWardsDestroyed                int64
blueFirstBlood                    int64
blueKills                         int64
blueDeaths                        int64
blueAssists                       int64
blueEliteMonsters                 int64
blueDragons                       int64
blueHeralds                       int64
blueTowersDestroyed               int64
blueTotalGold                     int64
blueAvgLevel                    float64
blueTotalExperience               int64
blueTotalMinionsKilled            int64
blueTotalJungleMinionsKilled      int64
blueGoldDiff                      int64
blueExperienceDiff                int64
blueCSPerMin                    float64
blueGoldPerMin                  float64
redWardsPlaced                    int64
redWardsDestroyed                 int64
redFirstBlood                     int64
redKills                          int64
redDeaths                         int64


In [9]:
from sklearn.ensemble import RandomForestClassifier

In [10]:
rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=1)

In [11]:
# calculate median of gameID
median_gameId = games.index.to_series().median()

# split into training and test games
train = games[games.index < median_gameId]
test = games[games.index > median_gameId]

In [12]:
predictors = ["blueKills", "blueDeaths", "redKills", "redDeaths"]

In [13]:
rf.fit(train[predictors], train["blueWins"])

In [14]:
preds = rf.predict(test[predictors])

In [15]:
from sklearn.metrics import accuracy_score

In [16]:
acc = accuracy_score(test["blueWins"], preds)

In [17]:
acc

0.7027738408584734

In [19]:
combined = pd.DataFrame(dict(actual=test["blueWins"], prediction=preds))

In [20]:
pd.crosstab(index=combined["actual"], columns=combined["prediction"])

prediction,0,1
actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1786,697
1,771,1685


In [21]:
from sklearn.metrics import precision_score

In [22]:
precision_score(test["blueWins"], preds)

0.7073887489504618

In [24]:
# Relative performance metrics (differences)
games['kill_diff'] = games['blueKills'] - games['redKills']
games['death_diff'] = games['blueDeaths'] - games['redDeaths']
games['gold_diff'] = games['blueTotalGold'] - games['redTotalGold']
games['exp_diff'] = games['blueTotalExperience'] - games['redTotalExperience']
games['wards_diff'] = games['blueWardsPlaced'] - games['redWardsPlaced']
games['elite_monster_diff'] = games['blueEliteMonsters'] - games['redEliteMonsters']

# Relative performance metrics (ratios)
games['gold_ratio'] = games['blueTotalGold'] / (games['redTotalGold'] + 1e-5)  # Avoid division by zero
games['kill_ratio'] = games['blueKills'] / (games['redKills'] + 1e-5)
games['exp_ratio'] = games['blueTotalExperience'] / (games['redTotalExperience'] + 1e-5)

In [26]:
# Aggregate statistics
aggregate_stats = {
    'blueKills': 'mean',
    'redKills': 'mean',
    'blueTotalGold': 'mean',
    'redTotalGold': 'mean',
    'blueTotalExperience': 'mean',
    'redTotalExperience': 'mean',
    'blueWardsPlaced': 'mean',
    'redWardsPlaced': 'mean',
}

agg_df = games.agg(aggregate_stats)
print(agg_df)  # Displays average stats for blue and red teams

blueKills                  6.183925
redKills                   6.137666
blueTotalGold          16503.455512
redTotalGold           16489.041401
blueTotalExperience    17928.110133
redTotalExperience     17961.730438
blueWardsPlaced           22.288288
redWardsPlaced            22.367952
dtype: float64


In [27]:
# Add normalized features based on averages
games['blueKills_norm'] = games['blueKills'] / agg_df['blueKills']
games['redKills_norm'] = games['redKills'] / agg_df['redKills']
games['blueGold_norm'] = games['blueTotalGold'] / agg_df['blueTotalGold']
games['redGold_norm'] = games['redTotalGold'] / agg_df['redTotalGold']

In [28]:
# Select important features
feature_columns = [
    'kill_diff', 'gold_diff', 'exp_diff', 'wards_diff', 'elite_monster_diff',
    'gold_ratio', 'kill_ratio', 'exp_ratio',
    'blueKills_norm', 'redKills_norm', 'blueGold_norm', 'redGold_norm'
]

X = games[feature_columns]  # Features
y = games['blueWins']       # Target

In [29]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.71      0.72      0.72       983
           1       0.72      0.71      0.72       993

    accuracy                           0.72      1976
   macro avg       0.72      0.72      0.72      1976
weighted avg       0.72      0.72      0.72      1976



In [39]:
def make_predictions(data, predictors):
    train = games[games.index < median_gameId]
    test = games[games.index > median_gameId]
    rf.fit(train[predictors], train["blueWins"])
    preds = rf.predict(test[predictors])
    combined = pd.DataFrame(dict(actual=test["blueWins"], predicted=preds), index=test.index)
    precision = precision_score(test["blueWins"], preds)
    return combined, precision

In [40]:
# Exclude columns that shouldn't be used as predictors
excluded_columns = ["blueWins", "gameId"]  # Add other non-relevant columns here if needed

# Select all numeric columns except those in excluded_columns
predictors = [col for col in games.columns if col not in excluded_columns]

In [41]:
combined, precision = make_predictions(games, predictors)

In [34]:
games.columns

Index(['blueWins', 'blueWardsPlaced', 'blueWardsDestroyed', 'blueFirstBlood',
       'blueKills', 'blueDeaths', 'blueAssists', 'blueEliteMonsters',
       'blueDragons', 'blueHeralds', 'blueTowersDestroyed', 'blueTotalGold',
       'blueAvgLevel', 'blueTotalExperience', 'blueTotalMinionsKilled',
       'blueTotalJungleMinionsKilled', 'blueGoldDiff', 'blueExperienceDiff',
       'blueCSPerMin', 'blueGoldPerMin', 'redWardsPlaced', 'redWardsDestroyed',
       'redFirstBlood', 'redKills', 'redDeaths', 'redAssists',
       'redEliteMonsters', 'redDragons', 'redHeralds', 'redTowersDestroyed',
       'redTotalGold', 'redAvgLevel', 'redTotalExperience',
       'redTotalMinionsKilled', 'redTotalJungleMinionsKilled', 'redGoldDiff',
       'redExperienceDiff', 'redCSPerMin', 'redGoldPerMin', 'kill_diff',
       'death_diff', 'gold_diff', 'exp_diff', 'wards_diff',
       'elite_monster_diff', 'gold_ratio', 'kill_ratio', 'exp_ratio',
       'blueKills_norm', 'redKills_norm', 'blueGold_norm', 'redGo

In [42]:
precision

0.7160844841592201

In [43]:
combined

Unnamed: 0_level_0,actual,predicted
gameId,Unnamed: 1_level_1,Unnamed: 2_level_1
4519157822,0,1
4523371949,0,0
4521474530,0,0
4524384067,0,0
4515594785,1,1
...,...,...
4527873286,1,1
4527797466,1,1
4527713716,0,0
4527628313,0,0
