In [227]:
import pandas as pd

df = pd.read_csv('../jungler_data.csv')

1. Data Exploration

In [228]:
df.shape

(3053, 13)

In [229]:
df.columns

Index(['kills_first_4_min', 'assists_first_4_min', 'camps_cleared_first_4_min',
       'gold_earned_first_4_min', 'win', 'kills_details', 'summoner_spells',
       'champion', 'first_item', 'level_at_first_blood', 'firstBlood',
       'gold_per_minute', 'game_type'],
      dtype='object')

In [230]:
df.head

<bound method NDFrame.head of       kills_first_4_min  assists_first_4_min  camps_cleared_first_4_min  \
0                     0                    0                         28   
1                     0                    0                         24   
2                     0                    0                         28   
3                     0                    0                         28   
4                     1                    0                         24   
...                 ...                  ...                        ...   
3048                  1                    0                         19   
3049                  1                    0                         20   
3050                  1                    0                         16   
3051                  0                    0                         12   
3052                  1                    0                         24   

      gold_earned_first_4_min    win kills_details summoner_spells ch

In [231]:
df.dtypes

kills_first_4_min              int64
assists_first_4_min            int64
camps_cleared_first_4_min      int64
gold_earned_first_4_min        int64
win                             bool
kills_details                 object
summoner_spells               object
champion                      object
first_item                    object
level_at_first_blood           int64
firstBlood                      bool
gold_per_minute              float64
game_type                     object
dtype: object

2. Data manipulation

We only care about Ranked Solo/Duo games, since these games are more competative than other game types

In [232]:
ranked_only_df = df[df['game_type'] == 'Ranked Solo/Duo']

In [233]:
ranked_only_df.head

<bound method NDFrame.head of       kills_first_4_min  assists_first_4_min  camps_cleared_first_4_min  \
0                     0                    0                         28   
1                     0                    0                         24   
4                     1                    0                         24   
5                     1                    0                         20   
6                     0                    0                         16   
...                 ...                  ...                        ...   
3048                  1                    0                         19   
3049                  1                    0                         20   
3050                  1                    0                         16   
3051                  0                    0                         12   
3052                  1                    0                         24   

      gold_earned_first_4_min    win kills_details summoner_spells  c

In [234]:
ranked_only_df.shape

(2190, 13)

We still have 2190 Games to use, thats Great!
Lets check for duplicates

In [235]:
duplicates = ranked_only_df[ranked_only_df.duplicated()]
print(duplicates)

      kills_first_4_min  assists_first_4_min  camps_cleared_first_4_min  \
180                   0                    0                         28   
305                   0                    0                         28   
307                   0                    0                         28   
314                   0                    0                         28   
356                   0                    0                         28   
...                 ...                  ...                        ...   
2878                  0                    0                         28   
2940                  0                    0                         28   
2957                  0                    0                         28   
3015                  0                    0                         28   
3036                  0                    0                         28   

      gold_earned_first_4_min    win kills_details summoner_spells  champion  \
180                

380 Duplicates!!! Looks like our script had some overlap in the matches we recorded.

Lets Drop these entries

In [236]:
clean_df = ranked_only_df.drop_duplicates()

In [237]:
#Error Checking
print(len(ranked_only_df) - len(clean_df))

380


Our summoner_spells column can use some cleaning, since all entries have 11 in it, that is a constant, we only care about the summoner_spell that is not 11

In [238]:
def extract_spell(spell_str):
    spell_str = spell_str.strip('()')
    spell_list = spell_str.split(',')
    return int(spell_list[1]) if int(spell_list[0]) == 11 else int(spell_list[0])

reformatted_df  =  clean_df.copy()

In [239]:
# Apply the function to the Summoner_spells column
reformatted_df['secondary_summoner_spell'] = reformatted_df['summoner_spells'].apply(extract_spell)

# Drop the old column
reformatted_df.drop(columns=['summoner_spells'], inplace=True)

In [240]:
reformatted_df['secondary_summoner_spell']

0       4
1       4
4       4
5       4
6       4
       ..
3048    4
3049    4
3050    4
3051    4
3052    4
Name: secondary_summoner_spell, Length: 1810, dtype: int64

In [241]:
missing_values = reformatted_df.isnull().sum()
print(missing_values)

kills_first_4_min            0
assists_first_4_min          0
camps_cleared_first_4_min    0
gold_earned_first_4_min      0
win                          0
kills_details                0
champion                     0
first_item                   0
level_at_first_blood         0
firstBlood                   0
gold_per_minute              0
game_type                    0
secondary_summoner_spell     0
dtype: int64


In [242]:
#Drop game_type column since they are all the same now
reformatted_df = reformatted_df.drop(columns='game_type')

Since I would Like to use a nueral network we have to encode some of our columns

In [243]:
reformatted_df.dtypes

kills_first_4_min              int64
assists_first_4_min            int64
camps_cleared_first_4_min      int64
gold_earned_first_4_min        int64
win                             bool
kills_details                 object
champion                      object
first_item                    object
level_at_first_blood           int64
firstBlood                      bool
gold_per_minute              float64
secondary_summoner_spell       int64
dtype: object

In [244]:
from sklearn.preprocessing import LabelEncoder

le_first_item = LabelEncoder()
le_champion = LabelEncoder()

reformatted_df['first_item_encoded'] = le_first_item.fit_transform(reformatted_df['first_item'])
reformatted_df['champion_encoded'] = le_champion.fit_transform(reformatted_df['champion'])

print(reformatted_df[['first_item', 'first_item_encoded', 'champion', 'champion_encoded']].head())



  first_item  first_item_encoded  champion  champion_encoded
0    No Item                   2   Shyvana                55
1       Blue                   0     Viego                68
4        Red                   3      Kayn                23
5        Red                   3  Nocturne                42
6      Green                   1     Amumu                 0


In [245]:
print(reformatted_df['champion_encoded'])

0       55
1       68
4       23
5       42
6        0
        ..
3048    50
3049    12
3050     3
3051    25
3052    30
Name: champion_encoded, Length: 1810, dtype: int32


In [246]:
reformatted_df = reformatted_df.drop(columns=['first_item', 'champion'])

In [249]:
#Change kill details to first_kill_time, which records the timestamp of the junglers first kill of the game
reformatted_df['first_kill_time'] = reformatted_df['kills_details'].apply(lambda x: int(eval(x)[0]) if eval(x) else 0)
reformatted_df = reformatted_df.drop(columns=['kills_details'])


In [254]:
reformatted_df.dtypes

kills_first_4_min              int64
assists_first_4_min            int64
camps_cleared_first_4_min      int64
gold_earned_first_4_min        int64
win                             bool
level_at_first_blood           int64
firstBlood                      bool
gold_per_minute              float64
secondary_summoner_spell       int64
first_item_encoded             int32
champion_encoded               int32
first_kill_time                int64
dtype: object

Now that our Data is cleaned and reformated and encoded, we have to split the data so that our model can Train and Test

In [250]:
from sklearn.model_selection import train_test_split

X = reformatted_df.drop(columns=['win'])  # Features
y = reformatted_df['win']  # Target

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [251]:
print(f"Training features shape: {X_train.shape}")
print(f"Test features shape: {X_test.shape}")
print(f"Training target shape: {y_train.shape}")
print(f"Test target shape: {y_test.shape}")

Training features shape: (1448, 11)
Test features shape: (362, 11)
Training target shape: (1448,)
Test target shape: (362,)


3. Model Creation

In [252]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

