In [1]:
import numpy as np
import pandas as pd
import requests
import time
from scipy.stats import linregress
from matplotlib import pyplot as plt

In [2]:
bronze_df = pd.read_csv("gg_datafiles/bronze.csv", encoding="utf-8")
gold_df = pd.read_csv("gg_datafiles/gold.csv", encoding="utf-8")
platinum_df = pd.read_csv("gg_datafiles/platinum.csv", encoding="utf-8")
platinumplus_df = pd.read_csv("gg_datafiles/platinumplus.csv", encoding="utf-8")
silver_df = pd.read_csv("gg_datafiles/silver.csv", encoding="utf-8")

In [3]:
####Time course data
TC_data_columns_df = pd.read_csv("Timecourse_Datafiles/_columns.csv", encoding="utf-8")
TC_data_bans_df = pd.read_csv("Timecourse_Datafiles/bans.csv", encoding="utf-8")
TC_data_kills_df = pd.read_csv("Timecourse_Datafiles/kills.csv", encoding="utf-8")
TC_data_LeagueofLegends_df = pd.read_csv("Timecourse_Datafiles/LeagueofLegends.csv", encoding="utf-8")
TC_data_matchinfo_df = pd.read_csv("Timecourse_Datafiles/matchinfo.csv", encoding="utf-8")
TC_data_structures_df = pd.read_csv("Timecourse_Datafiles/structures.csv", encoding="utf-8")

In [4]:
bronze_df["League"] = "Bronze"
gold_df["League"] = "Gold"
platinum_df["League"] = "Platinum"
platinumplus_df["League"] = "Platinum Plus"
silver_df["League"] = "Silver"

#Create stacked combined dataset
stack_df = pd.concat([bronze_df, 
                      gold_df, 
                      platinum_df, 
                      platinumplus_df,
                      silver_df], axis = 0)

In [5]:
#Columns in the dataset
stack_df.columns

Index(['Rank', 'Champion', 'Role', 'Win Percent', 'Play Percent', 'Ban Rate',
       'Playerbase Avg. Games', 'Kills', 'Deaths', 'Assists',
       'Largest Killing Spree', 'Damage Dealt', 'Damage Taken',
       'Total Healing', 'Minions Killed', 'Enemy Jungle CS', 'Team Jungle CS',
       'Gold Earned', 'Role Position', 'Position Change', 'League'],
      dtype='object')

In [6]:
#Search for null values, number of non-null values, data types
stack_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 989 entries, 0 to 187
Data columns (total 21 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Rank                   989 non-null    object 
 1   Champion               979 non-null    object 
 2   Role                   979 non-null    object 
 3   Win Percent            979 non-null    object 
 4   Play Percent           979 non-null    object 
 5   Ban Rate               979 non-null    object 
 6   Playerbase Avg. Games  898 non-null    float64
 7   Kills                  979 non-null    float64
 8   Deaths                 979 non-null    float64
 9   Assists                979 non-null    float64
 10  Largest Killing Spree  979 non-null    float64
 11  Damage Dealt           979 non-null    float64
 12  Damage Taken           979 non-null    float64
 13  Total Healing          979 non-null    float64
 14  Minions Killed         979 non-null    float64
 15  Enemy 

In [7]:
#Identify Null Values
df_null = stack_df[stack_df.isnull().any(axis=1)]
df_null

Unnamed: 0,Rank,Champion,Role,Win Percent,Play Percent,Ban Rate,Playerbase Avg. Games,Kills,Deaths,Assists,...,Damage Dealt,Damage Taken,Total Healing,Minions Killed,Enemy Jungle CS,Team Jungle CS,Gold Earned,Role Position,Position Change,League
10,11,Aphelios,ADC,48.48%,5.24%,2.27%,,6.69,6.69,7.23,...,20397.0,18356.0,3145.0,167.2,2.50,8.29,11726.0,16.0,2.0,Bronze
64,65,Kai'Sa,ADC,47.90%,7.35%,0.31%,,7.75,6.12,6.97,...,19541.0,19523.0,4109.0,170.8,2.43,8.01,12177.0,10.0,2.0,Bronze
100,101,Neeko,Middle,52.12%,1.60%,0.12%,,6.28,6.09,8.02,...,20076.0,18756.0,2326.0,140.4,1.29,2.93,10684.0,34.0,2.0,Bronze
101,102,Neeko,Support,50.99%,0.98%,0.12%,,3.69,6.00,7.80,...,12929.0,14498.0,1377.0,49.6,0.52,0.89,8024.0,34.0,2.0,Bronze
118,119,Pyke,Support,48.24%,5.91%,2.07%,,6.60,6.79,8.04,...,10037.0,18962.0,4607.0,36.1,0.27,0.30,10454.0,8.0,1.0,Bronze
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
138,139,Sylas,Top,43.78%,1.00%,1.61%,,6.35,6.73,6.11,...,19669.0,30434.0,9107.0,138.6,1.67,3.36,10650.0,41.0,0.0,Silver
179,180,Yuumi,Support,49.99%,7.97%,3.39%,,1.61,3.66,14.53,...,8348.0,7408.0,14366.0,6.7,0.09,0.13,7344.0,1.0,0.0,Silver
184,185,Zoe,Middle,46.81%,1.85%,0.31%,,6.62,5.65,7.17,...,22144.0,18366.0,3515.0,145.4,0.86,3.64,10826.0,32.0,0.0,Silver
186,Champion.gg isn't endorsed by Riot Games and d...,,,,,,,,,,...,,,,,,,,,,Silver


In [8]:
#Drop weird rows with no Champion listed & check non-null values
df2 = stack_df.dropna(subset = ['Champion'])
df2.info()

## === Notice that playerbase avg games still has less data (898) === ##

<class 'pandas.core.frame.DataFrame'>
Int64Index: 979 entries, 0 to 185
Data columns (total 21 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Rank                   979 non-null    object 
 1   Champion               979 non-null    object 
 2   Role                   979 non-null    object 
 3   Win Percent            979 non-null    object 
 4   Play Percent           979 non-null    object 
 5   Ban Rate               979 non-null    object 
 6   Playerbase Avg. Games  898 non-null    float64
 7   Kills                  979 non-null    float64
 8   Deaths                 979 non-null    float64
 9   Assists                979 non-null    float64
 10  Largest Killing Spree  979 non-null    float64
 11  Damage Dealt           979 non-null    float64
 12  Damage Taken           979 non-null    float64
 13  Total Healing          979 non-null    float64
 14  Minions Killed         979 non-null    float64
 15  Enemy 

In [10]:
#Percentage values showing up as OBJECTS, 
#problematic, need to remove the % for calculations!
champ_df = df2.groupby(["Champion"]).mean()
champ_df.head()

## === Notice there are NO stats for these metrics ==== ##

Unnamed: 0_level_0,Playerbase Avg. Games,Kills,Deaths,Assists,Largest Killing Spree,Damage Dealt,Damage Taken,Total Healing,Minions Killed,Enemy Jungle CS,Team Jungle CS,Gold Earned,Role Position,Position Change
Champion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Aatrox,8.174286,4.644286,4.957143,4.828571,6.0,15734.857143,26087.0,10089.857143,141.042857,2.104286,3.094286,9579.0,31.714286,2.714286
Ahri,8.608333,6.106667,5.121667,7.061667,6.666667,18547.666667,17217.333333,3221.166667,142.95,0.858333,2.855,10288.5,20.5,5.166667
Akali,12.318333,6.916667,5.678333,4.551667,7.5,17378.416667,22477.916667,6146.25,142.008333,1.011667,1.869167,10186.75,40.0,2.25
Alistar,3.3,1.91,5.65,12.508,3.8,7564.2,20848.2,8914.2,30.64,0.076,0.108,7387.2,26.6,4.2
Amumu,2.844,4.554,5.704,9.666,6.8,13318.6,26994.4,7621.2,32.5,6.59,80.366,10134.6,28.4,1.6


In [11]:
#Create a function to convert percent to float
def convert_percent(val):
    """
    Convert the percentage string to an actual floating point percent
    - Remove %
    - Divide by 100 to make decimal
    """
    new_val = val.replace('%', '')
    return float(new_val) / 100

In [12]:
# Drop extra columns
df2.drop(columns=['Win Percent', 'Play Percent', 'Ban Rate'])

Unnamed: 0,Rank,Champion,Role,Playerbase Avg. Games,Kills,Deaths,Assists,Largest Killing Spree,Damage Dealt,Damage Taken,Total Healing,Minions Killed,Enemy Jungle CS,Team Jungle CS,Gold Earned,Role Position,Position Change,League
0,1,Aatrox,Top,2.58,5.49,5.90,5.96,7.0,18687.0,31430.0,12450.0,146.8,2.52,3.66,10685.0,35.0,1.0,Bronze
1,2,Ahri,Middle,2.91,6.82,5.69,7.73,7.0,20593.0,18989.0,3499.0,140.2,0.85,2.83,10769.0,20.0,3.0,Bronze
2,3,Akali,Middle,2.98,8.45,6.61,5.48,10.0,19250.0,25051.0,6740.0,133.9,1.16,2.28,10933.0,40.0,2.0,Bronze
3,4,Akali,Top,3.98,7.21,6.06,4.82,7.0,19055.0,24429.0,6704.0,145.3,1.18,1.71,10718.0,40.0,0.0,Bronze
4,5,Alistar,Support,2.41,2.02,5.84,12.80,4.0,7948.0,21527.0,9162.0,30.8,0.10,0.13,7501.0,22.0,6.0,Bronze
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
181,182,Zed,Middle,2.76,9.09,6.41,5.59,9.0,22524.0,21230.0,3175.0,158.1,1.90,3.69,12116.0,1.0,0.0,Silver
182,183,Ziggs,Middle,2.81,5.58,5.55,7.71,8.0,25282.0,16238.0,1347.0,170.2,1.47,5.08,11412.0,27.0,0.0,Silver
183,184,Zilean,Support,3.05,2.57,5.03,11.44,4.0,11799.0,15014.0,5666.0,41.8,0.36,0.53,8124.0,19.0,0.0,Silver
184,185,Zoe,Middle,,6.62,5.65,7.17,8.0,22144.0,18366.0,3515.0,145.4,0.86,3.64,10826.0,32.0,0.0,Silver


In [13]:
#Reorder columns
df2.columns
df2 = df2.reindex(columns = ['Rank', 'Champion', 'Role', 'League',
       'win_percent', 'ban_rate', 'play_percent',
       'Playerbase Avg. Games', 'Kills', 'Deaths', 'Assists',
       'Largest Killing Spree', 'Damage Dealt', 'Damage Taken',
       'Total Healing', 'Minions Killed', 'Enemy Jungle CS', 'Team Jungle CS',
       'Gold Earned', 'Role Position', 'Position Change'])
df2.head()

Unnamed: 0,Rank,Champion,Role,League,win_percent,ban_rate,play_percent,Playerbase Avg. Games,Kills,Deaths,...,Largest Killing Spree,Damage Dealt,Damage Taken,Total Healing,Minions Killed,Enemy Jungle CS,Team Jungle CS,Gold Earned,Role Position,Position Change
0,1,Aatrox,Top,Bronze,,,,2.58,5.49,5.9,...,7.0,18687.0,31430.0,12450.0,146.8,2.52,3.66,10685.0,35.0,1.0
1,2,Ahri,Middle,Bronze,,,,2.91,6.82,5.69,...,7.0,20593.0,18989.0,3499.0,140.2,0.85,2.83,10769.0,20.0,3.0
2,3,Akali,Middle,Bronze,,,,2.98,8.45,6.61,...,10.0,19250.0,25051.0,6740.0,133.9,1.16,2.28,10933.0,40.0,2.0
3,4,Akali,Top,Bronze,,,,3.98,7.21,6.06,...,7.0,19055.0,24429.0,6704.0,145.3,1.18,1.71,10718.0,40.0,0.0
4,5,Alistar,Support,Bronze,,,,2.41,2.02,5.84,...,4.0,7948.0,21527.0,9162.0,30.8,0.1,0.13,7501.0,22.0,6.0


In [97]:
jungle = df2.loc[df2["Role"] == "Jungle"][["Rank", "Champion", "League", 'Minions Killed', "Enemy Jungle CS", 'Team Jungle CS', "Role Position", "Assists"]]
jungle.head()

Unnamed: 0,Rank,Champion,League,Minions Killed,Enemy Jungle CS,Team Jungle CS,Role Position,Assists
5,6,Amumu,Bronze,32.4,6.92,76.56,23.0,9.78
23,24,Cho'Gath,Bronze,69.6,5.19,56.4,34.0,6.87
28,29,Diana,Bronze,49.1,7.08,74.24,35.0,6.76
30,31,Dr. Mundo,Bronze,49.3,9.89,73.89,14.0,7.33
33,34,Ekko,Bronze,38.8,7.09,72.82,4.0,7.09


In [98]:
top_score = 0
top_name = ''
current_score = 0
for index, row in jungle.iterrows(): 
    current_score = (row['Minions Killed'] + row['Enemy Jungle CS'] + row['Team Jungle CS'] + row['Role Position'] + row['Assists'])
    
    if current_score > top_score :
        top_score = current_score
        top_name = row['Champion']
print(f'{top_name} is the most viable Jungler with a score of {int(top_score)}')

     
   
    

Singed is the most viable Jungler with a score of 229


In [99]:
support = df2.loc[df2["Role"] == "Support"][["Rank", "Champion", "League", 'Minions Killed', "Total Healing", 'Damage Taken', "Deaths", "Assists"]]
support.head()

Unnamed: 0,Rank,Champion,League,Minions Killed,Total Healing,Damage Taken,Deaths,Assists
4,5,Alistar,Bronze,30.8,9162.0,21527.0,5.84,12.8
7,8,Anivia,Bronze,65.4,1957.0,15847.0,4.27,6.87
9,10,Annie,Bronze,53.6,1004.0,12361.0,5.63,6.48
14,15,Bard,Bronze,32.2,6428.0,17262.0,5.58,12.44
15,16,Blitzcrank,Bronze,29.6,1766.0,18730.0,5.82,11.62


In [100]:
top_score = 0
top_name = ''
current_score = 0
for index, row in support.iterrows(): 
    current_score = (row['Minions Killed'] - row['Deaths'] + row['Total Healing'] + row['Damage Taken'] + row['Assists'])
    
    if current_score > top_score :
        top_score = current_score
        top_name = row['Champion']
print(f'{top_name} is the most viable Support with a score of {int(top_score)}')

Soraka is the most viable Support with a score of 31853


In [101]:
top = df2.loc[df2["Role"] == "Top"][["Rank", "Champion", "League", 'Kills', 'Minions Killed', "Largest Killing Spree", 'Damage Dealt', "Deaths", "Assists"]]
top.head()

Unnamed: 0,Rank,Champion,League,Kills,Minions Killed,Largest Killing Spree,Damage Dealt,Deaths,Assists
0,1,Aatrox,Bronze,5.49,146.8,7.0,18687.0,5.9,5.96
3,4,Akali,Bronze,7.21,145.3,7.0,19055.0,6.06,4.82
20,21,Camille,Bronze,6.3,146.7,7.0,18236.0,6.44,5.9
25,26,Cho'Gath,Bronze,6.48,146.4,7.0,18682.0,5.9,6.13
27,28,Darius,Bronze,6.96,155.2,8.0,17987.0,6.23,4.94


In [102]:
top_score = 0
top_name = ''
current_score = 0
for index, row in top.iterrows(): 
    current_score = (row['Minions Killed'] - row['Deaths'] + row['Kills'] + row['Largest Killing Spree'])
    
    if current_score > top_score :
        top_score = current_score
        top_name = row['Champion']
print(f'{top_name} is the most viable Top with a score of {int(top_score)}')

Irelia is the most viable Top with a score of 208


In [103]:
mid = df2.loc[df2["Role"] == "Middle"][["Rank", "Champion", "League", 'Kills', 'Minions Killed', "Deaths", "Assists", 'Damage Dealt']]
mid.head()

Unnamed: 0,Rank,Champion,League,Kills,Minions Killed,Deaths,Assists,Damage Dealt
1,2,Ahri,Bronze,6.82,140.2,5.69,7.73,20593.0
2,3,Akali,Bronze,8.45,133.9,6.61,5.48,19250.0
6,7,Anivia,Bronze,5.94,158.3,4.96,7.49,19059.0
8,9,Annie,Bronze,7.91,139.4,6.49,7.8,22887.0
12,13,Aurelion Sol,Bronze,5.7,132.2,5.95,8.82,18482.0


In [104]:
top_score = 0
top_name = ''
current_score = 0
for index, row in mid.iterrows(): 
    current_score = (row['Minions Killed'] - row['Deaths'] + row['Kills'] + row['Assists'] + row['Damage Dealt'])
    
    if current_score > top_score :
        top_score = current_score
        top_name = row['Champion']
print(f'{top_name} is the most viable Mid with a score of {int(top_score)}')

Karthus is the most viable Mid with a score of 29669


In [105]:
ADC = df2.loc[df2["Role"] == "ADC"][["Rank", "Champion", "League", 'Kills', "Largest Killing Spree", 'Damage Dealt', "Assists"]]
ADC.head()

Unnamed: 0,Rank,Champion,League,Kills,Largest Killing Spree,Damage Dealt,Assists
10,11,Aphelios,Bronze,6.69,9.0,20397.0,7.23
11,12,Ashe,Bronze,6.57,9.0,18836.0,9.44
19,20,Caitlyn,Bronze,6.72,9.0,20200.0,7.42
21,22,Cassiopeia,Bronze,7.42,7.0,20655.0,7.13
32,33,Draven,Bronze,7.87,9.0,21272.0,6.91


In [106]:
top_score = 0
top_name = ''
current_score = 0
for index, row in ADC.iterrows(): 
    current_score = (row['Largest Killing Spree'] + row['Damage Dealt'] + row['Kills'] + row['Assists'] + row['Damage Dealt'])
    
    if current_score > top_score :
        top_score = current_score
        top_name = row['Champion']
print(f'{top_name} is the most viable ADC with a score of {int(top_score)}')

Kog'Maw is the most viable ADC with a score of 52481
