# Initial Set-Up Work:

In [None]:
# Basic imports are listed here.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.model_selection import train_test_split
from mlxtend.frequent_patterns import apriori, association_rules
import statsmodels.api as sm

In [None]:
# Reads the primary dataset and displays its head.
primary_df = pd.read_csv("LeagueofLegends.csv")
primary_df.head()

# Data Preprocessing:

## Data Cleanup:

In [None]:
# The initial shape of the dataframe, before any preprocessing is perofrmed, is shown here.
print(primary_df.shape)

In [None]:
# Removes unnecessary columns from the primary dataset. The shape of the updated datatfame is shown afterwards to ensure columns were successfully dropped.
primary_df.drop(['League', 'Season', 'Type', 'blueTeamTag', 'goldblueTop', 'goldblueJungle', 'goldblueMiddle', 
         'goldblueADC', 'goldblueSupport', 'redTop', 'goldredTop', 'redJungle', 'goldredJungle', 
         'redMiddle', 'goldredMiddle', 'redADC', 'goldredADC', 'redSupport', 'goldredSupport', 'redTeamTag', 
         'blueTop', 'blueMiddle', 'blueJungle', 'blueADC', 'blueSupport', 'golddiff', 'goldblue', 'bTowers', 
         'bInhibs', 'bDragons', 'bBarons','bHeralds', 'goldred', 'rTowers', 'rInhibs', 'rDragons', 'rBarons', 
         'rHeralds', 'blueBans', 'redBans', 'bKills', 'rKills'], axis=1, inplace=True)
print(primary_df.shape)

In [None]:
# Checks for any null values in the primary dataset.
primary_df.isnull().sum()

In [None]:
# Provides basic descriptive statistics about the match length.
primary_df.describe()['gamelength']

In [None]:
# Creates a function to identify outliers after being provided a dataframe and a specific column name.
def find_outliers(df, col):
    q1 = df[col].quantile(0.25)
    q3 = df[col].quantile(0.75)
    IQR = q3-q1
    outliers = df[((df[col] < (q1 - 1.5*IQR)) | (df[col] > (q3 + 1.5*IQR)))]
    return outliers

# Abnormally long or short matches are found and temporarily stored in a variable.
gamelength_outliers = find_outliers(primary_df, 'gamelength').index
workable_outliers = primary_df.loc[gamelength_outliers]
print(gamelength_outliers)

In [None]:
# Removes matches identified as outliers (based on game length) from the primary dataframe. The shape of the updated dataframe is shown afterwards to ensure removals have succeeded.
primary_df = primary_df.drop(gamelength_outliers)
print(primary_df.shape)
primary_df.describe()

In [None]:
primary_df = primary_df.sort_values(by = 'Address')
primary_df.head()

In [None]:
# Reads the kills dataset and displays its head.
kills_df = pd.read_csv("kills.csv")
kills_df.head()

In [None]:
# Groups the kills dataset by address and team, and counts the number of kills said team has accumulatd in total.
kills = kills_df.groupby(["Address", "Team"]).size().reset_index(name = "Kills")
kills.head()

In [None]:
# Separates the kills for each uniquue match by blue team's kills and red team's kills.
bKills = kills[kills['Team'] == 'bKills']
rKills = kills[kills['Team'] == 'rKills']

In [None]:
# Creates a bKills column in the primary dataset that matches the each unique match.
for row in bKills.index:
    addr = bKills.loc[row, 'Address']
    primary_df.loc[(primary_df['Address'] == addr), 'bKills'] = bKills.loc[row, 'Kills']

print(primary_df[['Address', 'bKills']])

In [None]:
# Creates a rKills column in the primary dataset that matches the each unique match.
for row in rKills.index:
    addr = rKills.loc[row, 'Address']
    primary_df.loc[(primary_df['Address'] == addr), 'rKills'] = rKills.loc[row, 'Kills']
    
print(primary_df[['Address', 'bKills', 'rKills']])

In [None]:
# Separates the deaths for each uniquue match by blue team's deaths and red team's deaths.
primary_df['bDeaths'] = primary_df['rKills']
primary_df['rDeaths'] = primary_df['bKills']

In [None]:
# Reads the gold dataset and displays all of its columns.
gold = pd.read_csv("gold.csv")
gold.columns

In [None]:
# Drops all rows that are not of 'golddiff' type.
for row in workable_outliers.index:
    addr = workable_outliers.loc[row, 'Address']
    gold = gold.drop(gold[gold['Address'] == addr].index)
gold = gold.drop(gold[gold['Type'] != 'golddiff'].index)

In [None]:
# Sorts the gold dataframe by address for future merging.
gold = gold.sort_values(by = 'Address')
gold.head()

In [None]:
# Defines new columns in the primary dataframe using the gold dataframe's columns. 
primary_df['gd15min'] = gold['min_15']
primary_df['gd30min'] = gold['min_30']
primary_df['gd45min'] = gold['min_45']
primary_df['gd56min'] = gold['min_56']

In [None]:
# Imputes the row mean as the value for NaN values in each row.
sel = primary_df[primary_df['gd56min'].isnull() & primary_df['gd45min'].notnull()].index
primary_df.loc[sel, 'gd56min'] = primary_df.loc[sel, ['gd15min', 'gd30min', 'gd45min']].mean(axis=1)

sel1 = primary_df[primary_df['gd45min'].isnull() & primary_df['gd30min'].notnull()].index
primary_df.loc[sel1, 'gd45min'] = primary_df.loc[sel1, ['gd15min', 'gd30min']].mean(axis=1)
primary_df.loc[sel1, 'gd56min'] = primary_df.loc[sel1, ['gd15min', 'gd30min']].mean(axis=1)

sel2 = primary_df[primary_df['gd30min'].isnull()].index
primary_df.loc[sel2, 'gd30min'] = primary_df.loc[sel2, 'gd15min']
primary_df.loc[sel2, 'gd45min'] = primary_df.loc[sel2, 'gd15min']
primary_df.loc[sel2, 'gd56min'] = primary_df.loc[sel2, 'gd15min']

print(primary_df[['gd15min', 'gd30min', 'gd45min', 'gd56min']])

## Useful Dataframes to Use Later, Created Here

In [None]:
# Creates a dataframe storing blue team compositions and their match results.
blue_team_composition_df = primary_df.filter(["Address","blueTopChamp","blueMiddleChamp", "blueADCChamp", "blueSupportChamp", "blueJungleChamp", "bResult"])
blue_team_composition_df = blue_team_composition_df[blue_team_composition_df["bResult"] == 1]
blue_team_composition_df.head()

In [None]:
# Creates a dataframe storing red team compositions and their match results.
red_team_composition_df = primary_df.filter(["Address","redTopChamp","redMiddleChamp", "redADCChamp", "redSupportChamp", "redJungleChamp", "rResult"])
red_team_composition_df = red_team_composition_df[red_team_composition_df["rResult"] == 1]
red_team_composition_df.head()

In [None]:
# Reads the bans dataset and drops the 4th and 5th bans columns. The head is then displayed.
bans_df = pd.read_csv("bans.csv")
bans_df.drop(["ban_4", "ban_5"], axis=1, inplace=True)
bans_df = bans_df.dropna(how='any',axis=0)

bans_df.head()

In [None]:
# Creates a dataframe out of blue team's bans for each unique match address.
blue_team_bans_df = bans_df[bans_df["Team"] == "blueBans"]
blue_team_bans_df.drop("Team", axis=1, inplace=True)
blue_team_bans_df.rename(columns = {'ban_1':'b_ban1','ban_2':'b_ban2','ban_3':'b_ban3'}, inplace = True)

bResult = primary_df[["Address", "bResult"]]
blue_team_bans_df = blue_team_bans_df.merge(bResult, on="Address", how="left")
blue_team_bans_df = blue_team_bans_df[blue_team_bans_df["bResult"] == 1]

blue_team_bans_df.head()

In [None]:
# Creates a dataframe out of red team's bans for each unique match address.
red_team_bans_df = bans_df[bans_df["Team"] == "redBans"]
red_team_bans_df.drop("Team", axis=1, inplace=True)
red_team_bans_df.rename(columns = {'ban_1':'r_ban1','ban_2':'r_ban2','ban_3':'r_ban3',}, inplace = True)

rResult = primary_df[["Address", "rResult"]]
red_team_bans_df = red_team_bans_df.merge(rResult, on="Address", how="left")
red_team_bans_df = red_team_bans_df[red_team_bans_df["rResult"] == 1]

red_team_bans_df.head()

In [None]:
# Reads the monster objectives dataset and displays its head.
monsters_df = pd.read_csv("monsters.csv")
monsters_df.head()

In [None]:
# Creates a new dataframe to hold all of the ORIGINAL DRAGONS killed in all matches.
original_dragons_df = monsters_df[monsters_df["Type"] == "DRAGON"]

# Creates a new dataframe to hold all of the FIRST ORIGINAL DRAGONS killed in each UNIQUE match.
original_first_drags_df = original_dragons_df.sort_values(by=["Time"]).groupby("Address").first().reset_index()
original_first_drags_df.head()

In [None]:
# Creates a new dataframe to hold all of the ELEMENTAL DRAGONS killed in all matches.
elemental_dragons_df = monsters_df[monsters_df["Type"].isin(["AIR_DRAGON", "EARTH_DRAGON", "FIRE_DRAGON", "WATER_DRAGON"])]

# Creates a new dataframe to hold all of the FIRST ELEMENTAL DRAGONS killed in each UNIQUE match.
elemental_first_drags_df = elemental_dragons_df.sort_values(by=["Time"]).groupby("Address").first().reset_index()
elemental_first_drags_df.head()

In [None]:
# Creates a new dataframe to hold all of the ELDER DRAGONS killed in all matches.
elder_dragons_df = monsters_df[monsters_df["Type"] == "ELDER_DRAGON"]

# Creates a new dataframe to hold all of the FIRST ELDER DRAGONS killed in each UNIQUE match.
elder_first_drags_df = elder_dragons_df.sort_values(by=["Time"]).groupby("Address").first().reset_index()
elder_first_drags_df.head()

In [None]:
# Creates a new dataframe to hold all of the RIFT HERALDS killed in all matches.
rift_heralds_df = monsters_df[monsters_df["Type"] == "RIFT_HERALD"]

# Creates a new dataframe to hold all of the FIRST RIFT HERALDS killed in each UNIQUE match.
first_rift_heralds_df = rift_heralds_df.sort_values(by=["Time"]).groupby("Address").first().reset_index()
first_rift_heralds_df.head()

In [None]:
# Creates a new dataframe to hold all of the BARON NASHORS killed in all matches.
baron_nashors_df = monsters_df[monsters_df["Type"] == "BARON_NASHOR"]

# Creates a new dataframe to hold all of the FIRST BARON NASHORS killed in each UNIQUE match.
first_baron_nashors_df = baron_nashors_df.sort_values(by=["Time"]).groupby("Address").first().reset_index()
first_baron_nashors_df.head()

In [None]:
# Reads the structures dataset and displays its head.
structures_df = pd.read_csv("structures.csv")
structures_df = structures_df.dropna()
structures_df.head()

In [None]:
# Creates a new dataframe to hold all of the OUTER TURRETS destroyed in all matches.
outer_turrets_df = structures_df[structures_df["Type"] == "OUTER_TURRET"]

# Creates a new dataframe to hold all of the FIRST OUTER TURRETS destroyed in each UNIQUE match.
first_outer_turrets_df = outer_turrets_df.sort_values(by=["Time"]).groupby("Address").first().reset_index()
first_outer_turrets_df.head()

In [None]:
# Creates two new columns to hold the total objectives achieved for each team.
primary_df['bTotal_Objectives'] = 0
primary_df['rTotal_Objectives'] = 0

In [None]:
# Creates workable dataframes that hold which teams acheived these objectives for each match.
TObjectives = first_outer_turrets_df.groupby(["Address", "Team"]).size().reset_index(name = "Towers")
ODObjectives = original_first_drags_df.groupby(["Address", "Team"]).size().reset_index(name = "Dragons")
EDObjectives = elemental_first_drags_df.groupby(["Address", "Team"]).size().reset_index(name = "Dragons")
ELObjectives = elder_first_drags_df.groupby(["Address", "Team"]).size().reset_index(name = "Dragons")
HObjectives = first_rift_heralds_df.groupby(["Address", "Team"]).size().reset_index(name = "Heralds")
BObjectives = first_baron_nashors_df.groupby(["Address", "Team"]).size().reset_index(name = "Barons")
print(TObjectives)

In [None]:
# Separates each dataframe into one for each team for each objective.
bTObjectives = TObjectives[TObjectives['Team'] == 'bTowers']
rTObjectives = TObjectives[TObjectives['Team'] == 'rTowers']
bODObjectives = ODObjectives[ODObjectives['Team'] == 'bDragons']
rODObjectives = ODObjectives[ODObjectives['Team'] == 'rDragons']
bEDObjectives = EDObjectives[EDObjectives['Team'] == 'bDragons']
rEDObjectives = EDObjectives[EDObjectives['Team'] == 'rDragons']
bELObjectives = ELObjectives[ELObjectives['Team'] == 'bDragons']
rELObjectives = ELObjectives[ELObjectives['Team'] == 'rDragons']
bHObjectives = HObjectives[HObjectives['Team'] == 'bHeralds']
rHObjectives = HObjectives[HObjectives['Team'] == 'rHeralds']
bBObjectives = BObjectives[BObjectives['Team'] == 'bBarons']
rBObjectives = BObjectives[BObjectives['Team'] == 'rBarons']
print(bTObjectives)

In [None]:
# Inputs the objective count for each blue team.
for row in bTObjectives.index:
    addr = bTObjectives.loc[row, 'Address']
    primary_df.loc[(primary_df['Address'] == addr), 'bTotal_Objectives'] += bTObjectives.loc[row, 'Towers']
for row in bODObjectives.index:
    addr = bODObjectives.loc[row, 'Address']
    primary_df.loc[(primary_df['Address'] == addr), 'bTotal_Objectives'] += bODObjectives.loc[row, 'Dragons']
for row in bEDObjectives.index:
    addr = bEDObjectives.loc[row, 'Address']
    primary_df.loc[(primary_df['Address'] == addr), 'bTotal_Objectives'] += bEDObjectives.loc[row, 'Dragons']
for row in bELObjectives.index:
    addr = bELObjectives.loc[row, 'Address']
    primary_df.loc[(primary_df['Address'] == addr), 'bTotal_Objectives'] += bELObjectives.loc[row, 'Dragons']
for row in bHObjectives.index:
    addr = bHObjectives.loc[row, 'Address']
    primary_df.loc[(primary_df['Address'] == addr), 'bTotal_Objectives'] += bHObjectives.loc[row, 'Heralds']
for row in bBObjectives.index:
    addr = bBObjectives.loc[row, 'Address']
    primary_df.loc[(primary_df['Address'] == addr), 'bTotal_Objectives'] += bBObjectives.loc[row, 'Barons']
print(primary_df[['Address', 'bTotal_Objectives']])

In [None]:
# Inputs the objective count for each red team.
for row in rTObjectives.index:
    addr = rTObjectives.loc[row, 'Address']
    primary_df.loc[(primary_df['Address'] == addr), 'rTotal_Objectives'] += rTObjectives.loc[row, 'Towers']
for row in rODObjectives.index:
    addr = rODObjectives.loc[row, 'Address']
    primary_df.loc[(primary_df['Address'] == addr), 'rTotal_Objectives'] += rODObjectives.loc[row, 'Dragons']
for row in rEDObjectives.index:
    addr = rEDObjectives.loc[row, 'Address']
    primary_df.loc[(primary_df['Address'] == addr), 'rTotal_Objectives'] += rEDObjectives.loc[row, 'Dragons']
for row in rELObjectives.index:
    addr = rELObjectives.loc[row, 'Address']
    primary_df.loc[(primary_df['Address'] == addr), 'rTotal_Objectives'] += rELObjectives.loc[row, 'Dragons']
for row in rHObjectives.index:
    addr = rHObjectives.loc[row, 'Address']
    primary_df.loc[(primary_df['Address'] == addr), 'rTotal_Objectives'] += rHObjectives.loc[row, 'Heralds']
for row in rBObjectives.index:
    addr = rBObjectives.loc[row, 'Address']
    primary_df.loc[(primary_df['Address'] == addr), 'rTotal_Objectives'] += rBObjectives.loc[row, 'Barons']
print(primary_df[['Address', 'rTotal_Objectives']])

In [None]:
# Test to see the five number summary of each team.
primary_df[['bTotal_Objectives', 'rTotal_Objectives']].describe()

In [None]:
# Rearranges the columns to be more readable.
final_primary_df = primary_df.reindex(columns = ['Address', 'Year', 'bResult', 'rResult', 'gamelength', 'blueTopChamp', 
                                                 'blueJungleChamp', 'blueMiddleChamp', 'blueADCChamp', 'blueSupportChamp', 'redTopChamp', 
                                                 'redJungleChamp', 'redMiddleChamp', 'redADCChamp', 'redSupportChamp', 'bKills', 
                                                 'bDeaths', 'bTotal_Objectives', 'rKills',  'rDeaths', 'rTotal_Objectives', 
                                                 'gd15min', 'gd30min', 'gd45min', 'gd56min'])
final_primary_df.columns

In [None]:
# Final state of the primary dataframe, after simple columns are added.
final_primary_df

## Basic Data Visualizations:

In [None]:
# Displays the cleaned values of game-length as a histogram.
plt.figure('gamelength', figsize=(16,8))
plt.title('Game Length')
final_primary_df['gamelength'].plot(kind='hist')

In [None]:
boxplot = final_primary_df.boxplot(column=["gamelength"])

In [None]:
# Displays the cleaned values of the years the matches take place as a histogram.
plt.figure('Year', figsize=(16,8))
plt.title('Year')
final_primary_df['Year'].plot(kind='hist', bins=[2014,2015,2016,2017,2018,2019])

In [None]:
# Creates a scatter plot for the blue team's kills in relation to total kills.
totalKills = final_primary_df['bKills'] + final_primary_df['rKills']
plt.figure(figsize = (16,8))
plt.scatter(
    totalKills,
    final_primary_df['bKills'],
    c='black'
)
plt.xlabel('Total Kills')
plt.ylabel('bKills')
plt.show()

In [None]:
# Creates a scatter plot for the red team's kills in relation to total kills.
plt.figure(figsize = (16,8))
plt.scatter(
    totalKills,
    final_primary_df['rKills'],
    c='black'
)
plt.xlabel('Total Kills')
plt.ylabel('rKills')
plt.show()

In [None]:
# Creates a correlation matrix for each metric.
final_primary_df.corr().style.background_gradient(cmap='coolwarm')

# Models Used:

## Linear vs Logistic Regression Models:

Linear Regression:

In [None]:
# Creates a Linear Regression model instance.
LRModel = LinearRegression()

In [None]:
# Sets the X and y values to fit the Linear Regression model for blue Kills and blue results.
X = final_primary_df['bKills'].values.reshape(-1, 1)
y = final_primary_df['bResult'].values.reshape(-1,1)
 
LRModel.fit(X,y)
print("Linear R-Squared of Blue Kills and Blue Results: ", LRModel.score(X,y))

In [None]:
# Same thing as above, but with a different metric.
X = final_primary_df['bTotal_Objectives'].values.reshape(-1, 1)
y = final_primary_df['bResult'].values.reshape(-1,1)
 
LRModel.fit(X,y)
print("Linear R-Squared of Blue Objectives and Blue Results: ", LRModel.score(X,y))

In [None]:
X = final_primary_df['rKills'].values.reshape(-1, 1)
y = final_primary_df['rResult'].values.reshape(-1,1)

LRModel.fit(X,y)
print("Linear R-Squared of Red Kills and Red Results: ", LRModel.score(X,y))

In [None]:
X = final_primary_df['rTotal_Objectives'].values.reshape(-1, 1)
y = final_primary_df['rResult'].values.reshape(-1,1)

LRModel.fit(X,y)
print("Linear R-Squared of Red Objectives and Red Results: ", LRModel.score(X,y))

In [None]:
# Sets the linear regression model to predict a blue win or a red loss based off of overall gold differential.
X = final_primary_df[['gd15min', 'gd30min', 'gd45min']]
y = final_primary_df['bResult']
 
LRModel.fit(X, y)
print("Linear R-Squared of Gold Differential and Blue Results: ", LRModel.score(X,y))

In [None]:
# This is our final Linear Regression model to be used for our demo.
X = final_primary_df[['bKills', 'bDeaths', 'bTotal_Objectives', 'gd15min', 'gd30min', 'gd45min']]
y = final_primary_df['bResult']
LRModel.fit(X, y)

Logistic Regression:

In [None]:
# Creates an instance of a Logistic Regression model.
LogModel = LogisticRegression(max_iter=10000)

In [None]:
# Same as above, but it is fitting the Logistic Regression model instead.
X = final_primary_df['bKills'].values.reshape(-1, 1)
y = final_primary_df['bResult'].values.reshape(-1,1)
LogModel.fit(X,y.ravel())
print("Logistic R-Squared of Blue Kills and Blue Results: ", LogModel.score(X,y))

In [None]:
X = final_primary_df['bTotal_Objectives'].values.reshape(-1, 1)
y = final_primary_df['bResult'].values.reshape(-1,1)
LogModel.fit(X,y.ravel())
print("Logistic R-Squared of Blue Objectives and Blue Results: ", LogModel.score(X,y))

In [None]:
X = final_primary_df['rKills'].values.reshape(-1, 1)
y = final_primary_df['rResult'].values.reshape(-1,1)
LogModel.fit(X,y.ravel())
print("Logistic R-Squared of Red Kills and Red Results: ", LogModel.score(X,y))

In [None]:
X = final_primary_df['rTotal_Objectives'].values.reshape(-1, 1)
y = final_primary_df['rResult'].values.reshape(-1,1)
LogModel.fit(X,y.ravel())
print("Logistic R-Squared of Red Objectives and Red Results: ", LogModel.score(X,y))

In [None]:
X = final_primary_df[['gd15min', 'gd30min', 'gd45min']]
y = final_primary_df['bResult']
LogModel.fit(X, y)
print("R-Squared of Gold Differential and Blue Results: ", LogModel.score(X,y))

In [None]:
# This is our final Logistic Regression model to be used for our demo.
X = final_primary_df[['bKills', 'bDeaths', 'bTotal_Objectives', 'gd15min', 'gd30min', 'gd45min']]
y = final_primary_df['bResult']
LogModel.fit(X, y)

Since all of the R-Squared metrics of Logistic Regression is significantly greater than those of the Linear Regression, the Logistic Regression model will be more accurate and more efficient in predicting results with these metrics.

## Apriori Algorithm (Team Compositions):

In [None]:
# Grabs the list of champions that appear across champion picks.
champions_list = np.unique(final_primary_df[['blueTopChamp', 'blueJungleChamp','blueMiddleChamp','blueADCChamp','blueSupportChamp','redTopChamp', 'redJungleChamp','redMiddleChamp','redADCChamp','redSupportChamp']].values)
champions_list = list(champions_list)

In [None]:
# Grabs all winning match addresses from the blue team compositions dataframe.
match_addresses = blue_team_composition_df["Address"]

# Creates an empty dataframe with 'champions' as the items in blue team's team composition. 1's and 0's are filled as necessary if a champion was picked by said team during a match.
blue_team_comp_basket = pd.DataFrame(0, index=range(len(match_addresses)), columns=champions_list)
blue_team_comp_basket = blue_team_comp_basket.join(match_addresses)

for index, row in blue_team_comp_basket.iterrows():
    match_instance = blue_team_composition_df.iloc[[index]]
    
    blueTopChamp = match_instance["blueTopChamp"].astype("string").item()
    blueJungleChamp = match_instance["blueJungleChamp"].astype("string").item()
    blueMiddleChamp = match_instance["blueMiddleChamp"].astype("string").item()
    blueADCChamp = match_instance["blueADCChamp"].astype("string").item()
    blueSupportChamp = match_instance["blueSupportChamp"].astype("string").item()
    
    blue_team_comp_basket.iloc[[index], blue_team_comp_basket.columns.get_loc(blueTopChamp)] = 1
    blue_team_comp_basket.iloc[[index], blue_team_comp_basket.columns.get_loc(blueJungleChamp)] = 1
    blue_team_comp_basket.iloc[[index], blue_team_comp_basket.columns.get_loc(blueMiddleChamp)] = 1
    blue_team_comp_basket.iloc[[index], blue_team_comp_basket.columns.get_loc(blueADCChamp)] = 1
    blue_team_comp_basket.iloc[[index], blue_team_comp_basket.columns.get_loc(blueSupportChamp)] = 1

In [None]:
# Performs the apriori algorithm on blue team's team compositions and returns support/confidence values for common picks.
blue_team_comp_basket.reset_index(drop=True, inplace=True)
fixed_blue_team_comp_basket = blue_team_comp_basket.set_index("Address")

frequent_items = apriori(fixed_blue_team_comp_basket, min_support=0.011, use_colnames=True)
rules = association_rules(frequent_items, metric="lift", min_threshold = 1)

rules = rules.sort_values(["confidence", "lift"], ascending=[False,False])
rules.head()

In [None]:
# Grabs all winning match addresses from the red team compositions dataframe.
match_addresses = red_team_composition_df["Address"]

# Creates an empty dataframe with 'champions' as the items in red team's team composition. 1's and 0's are filled as necessary if a champion was picked by said team during a match.
red_team_comp_basket = pd.DataFrame(0, index=range(len(match_addresses)), columns=champions_list)
red_team_comp_basket = red_team_comp_basket.join(match_addresses)

for index, row in red_team_comp_basket.iterrows():
    match_instance = red_team_composition_df.iloc[[index]]
    
    redTopChamp = match_instance["redTopChamp"].astype("string").item()
    redJungleChamp = match_instance["redJungleChamp"].astype("string").item()
    redMiddleChamp = match_instance["redMiddleChamp"].astype("string").item()
    redADCChamp = match_instance["redADCChamp"].astype("string").item()
    redSupportChamp = match_instance["redSupportChamp"].astype("string").item()
    
    red_team_comp_basket.iloc[[index], red_team_comp_basket.columns.get_loc(redTopChamp)] = 1
    red_team_comp_basket.iloc[[index], red_team_comp_basket.columns.get_loc(redJungleChamp)] = 1
    red_team_comp_basket.iloc[[index], red_team_comp_basket.columns.get_loc(redMiddleChamp)] = 1
    red_team_comp_basket.iloc[[index], red_team_comp_basket.columns.get_loc(redADCChamp)] = 1
    red_team_comp_basket.iloc[[index], red_team_comp_basket.columns.get_loc(redSupportChamp)] = 1

In [None]:
# Performs the apriori algorithm on red team's team compositions and returns support/confidence values for common picks.
red_team_comp_basket.reset_index(drop=True, inplace=True)
fixed_red_team_comp_basket = red_team_comp_basket.set_index("Address")
frequent_items = apriori(fixed_red_team_comp_basket, min_support=0.011, use_colnames=True)
rules = association_rules(frequent_items, metric="lift", min_threshold = 1)
rules = rules.sort_values(["confidence", "lift"], ascending=[False,False])
rules.head()

## Apriori Algorithm (Team Bans):

In [None]:
# Grabs the list of champions that appear across champion bans.
banned_champions_list = np.unique(bans_df[['ban_1', 'ban_2','ban_3']].values)
banned_champions_list = list(champions_list)

In [None]:
# Grabs all match addresses from the blue bans dataframe.
match_addresses_b = blue_team_bans_df["Address"]

# Creates an empty dataframe with 'champions' as the items in blue team's bans. 1's and 0's are filled as necessary if a champion was banned by said team during a match.
blue_team_bans_basket = pd.DataFrame(0, index=range(len(match_addresses_b)), columns=champions_list)
blue_team_bans_basket = blue_team_bans_basket.join(match_addresses_b)

for index, row in blue_team_bans_basket.iterrows():
    match_instance = blue_team_bans_df.iloc[[index]]
    
    b_ban1 = match_instance["b_ban1"].astype("string").item()
    b_ban2 = match_instance["b_ban2"].astype("string").item()
    b_ban3 = match_instance["b_ban3"].astype("string").item()
    
    blue_team_bans_basket.iloc[[index], blue_team_bans_basket.columns.get_loc(b_ban1)] = 1
    blue_team_bans_basket.iloc[[index], blue_team_bans_basket.columns.get_loc(b_ban2)] = 1
    blue_team_bans_basket.iloc[[index], blue_team_bans_basket.columns.get_loc(b_ban3)] = 1

In [None]:
# Performs the apriori algorithm on blue team's bans and returns support/confidence values for common bans.
blue_team_bans_basket.reset_index(drop=True, inplace=True)
fixed_blue_team_bans_basket = blue_team_bans_basket.set_index("Address")
frequent_items = apriori(fixed_blue_team_bans_basket, min_support=0.011, use_colnames=True)
rules = association_rules(frequent_items, metric="lift", min_threshold = 1)
rules = rules.sort_values(["confidence", "lift"], ascending=[False,False])
rules.head()

In [None]:
# Grabs all match addresses from the blue bans dataframe.
match_addresses_r = red_team_bans_df["Address"]

# Creates an empty dataframe with 'champions' as the items in blue team's team composition. 1's and 0's are filled as necessary if a champion was banned by said team during a match.
red_team_bans_basket = pd.DataFrame(0, index=range(len(match_addresses_r)), columns=champions_list)
red_team_bans_basket = red_team_bans_basket.join(match_addresses_r)

for index, row in red_team_bans_basket.iterrows():
    match_instance = red_team_bans_df.iloc[[index]]
    
    r_ban1 = match_instance["r_ban1"].astype("string").item()
    r_ban2 = match_instance["r_ban2"].astype("string").item()
    r_ban3 = match_instance["r_ban3"].astype("string").item()
    
    red_team_bans_basket.iloc[[index], red_team_bans_basket.columns.get_loc(r_ban1)] = 1
    red_team_bans_basket.iloc[[index], red_team_bans_basket.columns.get_loc(r_ban2)] = 1
    red_team_bans_basket.iloc[[index], red_team_bans_basket.columns.get_loc(r_ban3)] = 1

In [None]:
# Performs the apriori algorithm on red team's bans and returns support/confidence values for common bans.
red_team_bans_basket.reset_index(drop=True, inplace=True)
fixed_red_team_bans_basket = red_team_bans_basket.set_index("Address")
frequent_items = apriori(fixed_red_team_bans_basket, min_support=0.03, use_colnames=True)
rules = association_rules(frequent_items, metric="lift", min_threshold = 1)
rules = rules.sort_values(["confidence", "lift"], ascending=[False,False])
rules.head()

## Demo

### Linear Regression Demo:

In [None]:
# Using the X and Y's from earlier, we will test our Linear Regression model and print the accuracy.
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.25)
pred = LRModel.predict(xtest)
print("Mean squared error: ", np.mean((pred - ytest) ** 2))

### Logistic Regression Demo:

In [None]:
# Using the X and Y's from earlier, we will test our Logistic Regression model and print the accuracy.
pred = LogModel.predict(xtest)
print("Accuracy", metrics.accuracy_score(ytest, pred))

### Apriori Demo:

In [None]:
sample_row = {"Address": "http://matchhistory.na.leagueoflegends.com/en/#match-details/samplehash",
              "redTopChamp": "Volibear",
              "redJungleChamp": "Rengar",
              "redMiddleChamp": "Veigar",
              "redADCChamp": "Xayah",
              "redSupportChamp": "Rakan"}

for i in range(100):
    red_test_composition_df = red_team_composition_df.append(sample_row, ignore_index = True)

# Grabs all winning match addresses from the red team compositions dataframe.
match_addresses = red_test_composition_df["Address"]

# Creates an empty dataframe with 'champions' as the items in red team's team composition. 1's and 0's are filled as necessary if a champion was picked by said team during a match.
red_test_comp_basket = pd.DataFrame(0, index=range(len(match_addresses)), columns=champions_list)
red_test_comp_basket = red_test_comp_basket.join(match_addresses)

for index, row in red_test_comp_basket.iterrows():
    match_instance = red_test_composition_df.iloc[[index]]
    
    redTopChamp = match_instance["redTopChamp"].astype("string").item()
    redJungleChamp = match_instance["redJungleChamp"].astype("string").item()
    redMiddleChamp = match_instance["redMiddleChamp"].astype("string").item()
    redADCChamp = match_instance["redADCChamp"].astype("string").item()
    redSupportChamp = match_instance["redSupportChamp"].astype("string").item()
    
    red_test_comp_basket.iloc[[index], red_test_comp_basket.columns.get_loc(redTopChamp)] = 1
    red_test_comp_basket.iloc[[index], red_test_comp_basket.columns.get_loc(redJungleChamp)] = 1
    red_test_comp_basket.iloc[[index], red_test_comp_basket.columns.get_loc(redMiddleChamp)] = 1
    red_test_comp_basket.iloc[[index], red_test_comp_basket.columns.get_loc(redADCChamp)] = 1
    red_test_comp_basket.iloc[[index], red_test_comp_basket.columns.get_loc(redSupportChamp)] = 1

In [None]:
# Performs the apriori algorithm on red team's team compositions and returns support/confidence values for common picks.
red_test_comp_basket.reset_index(drop=True, inplace=True)
fixed_red_test_comp_basket = red_test_comp_basket.set_index("Address")
frequent_items = apriori(fixed_red_test_comp_basket, min_support=0.011, use_colnames=True)
rules = association_rules(frequent_items, metric="lift", min_threshold = 1)
rules = rules.sort_values(["confidence", "lift"], ascending=[False,False])
rules.head()