In [1]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import sqlite3
from sqlite3 import Error
import os
from sklearn.dummy import DummyRegressor

In [2]:
path = os.path.abspath(os.path.join(os.getcwd(), os.path.pardir))

In [3]:
def create_connection(db_file: str) -> sqlite3.Connection:

    conn = None
    try:
        conn = sqlite3.connect(db_file)
    except Error as e:
        print(e)

    return conn

In [4]:
conn = create_connection(os.path.join(path, "data/league.db"))

In [5]:
def select_data_to_ml_model(conn):
    """
    Query all rows in the tasks table
    :param conn: the Connection object
    :return:
    """
    cur = conn.cursor()
    cur.execute("""SELECT Club_Name,
                            Result,
                            Kills,
                            First_Blood,
                            First_Tower,
                            Dragons,
                            Barons, 
                            Gold, 
                            Ban_1, 
                            Ban_2, 
                            Ban_3, 
                            Ban_4, 
                            Ban_5, 
                            Pick_1, 
                            Pick_2, 
                            Pick_3, 
                            Pick_4, 
                            Pick_5, 
                            Team_stats.Match_id,
                            General_Data.Time,
                            General_Data.Game_Version
                    FROM Team_Stats
                    INNER JOIN General_Data
                    ON Team_Stats.Match_id = General_Data.Match_id
                    """)

    rows = cur.fetchall()
    return rows

In [6]:
columns_names = ['Club_Name', 'Result', 'Kills', 'First_Blood', 'First_Tower', 'Dragons', 'Barons', 'Gold', 'Ban_1', 'Ban_2', 'Ban_3', 'Ban_4', 'Ban_5', 'Pick_1', 'Pick_2', 'Pick_3', 'Pick_4', 'Pick_5', 'Match_id', 'Time', 'Game_Version']

In [7]:
data = select_data_to_ml_model(conn)

In [8]:
df_general = pd.DataFrame(data, columns=columns_names)

In [9]:
legends = (df_general.Ban_1.unique().tolist()+df_general.Ban_2.unique().tolist()+df_general.Ban_3.unique().tolist()+df_general.Ban_4.unique().tolist()+df_general.Ban_5.unique().tolist())

In [10]:
unique_legeneds = pd.Series(legends)

In [11]:
stripped = unique_legeneds.apply(lambda x: x.strip())


In [12]:
df_general = df_general.applymap(lambda x: x.strip() if isinstance(x, str) else x)

In [13]:
unique_legends = stripped.unique()

In [14]:
df = pd.DataFrame(unique_legends)

In [15]:
X = []
for i in range(len(df)):
    x = []
    x.append(df[0][i])
    x.append(df.index.to_list()[i])
    X.append(x)

In [16]:
enc = OneHotEncoder(handle_unknown='ignore')

In [17]:
enc.fit(X)

OneHotEncoder(handle_unknown='ignore')

In [18]:
enc.categories_

[array(['Aatrox', 'Ahri', 'Akali', 'Akshan', 'Alistar', 'Amumu', 'Anivia',
        'Annie', 'Aphelios', 'Ashe', 'Aurelion', 'Azir', 'Bard',
        'Blitzcrank', 'Brand', 'Braum', 'Caitlyn', 'Camille', 'Cassiopeia',
        'Chogath', 'Corki', 'Darius', 'Diana', 'Dr.', 'Draven', 'Ekko',
        'Elise', 'Evelynn', 'Ezreal', 'Fiddlesticks', 'Fiora', 'Fizz',
        'Galio', 'Gangplank', 'Garen', 'Gnar', 'Gragas', 'Graves', 'Gwen',
        'Hecarim', 'Heimerdinger', 'Illaoi', 'Irelia', 'Ivern', 'Janna',
        'Jarvan', 'Jax', 'Jayce', 'Jhin', 'Jinx', 'Kaisa', 'Kalista',
        'Karma', 'Karthus', 'Kassadin', 'Katarina', 'Kayle', 'Kayn',
        'Kennen', 'KhaZix', 'Kindred', 'Kled', 'KogMaw', 'LeBlanc', 'Lee',
        'Leona', 'Lillia', 'Lissandra', 'Lucian', 'Lulu', 'Lux',
        'Malphite', 'Malzahar', 'Maokai', 'Master', 'Miss', 'Mordekaiser',
        'Morgana', 'Nami', 'Nasus', 'Nautilus', 'Neeko', 'Nidalee',
        'Nocturne', 'None', 'Nunu', 'Olaf', 'Orianna', 'Ornn', 'Pantheo

In [19]:
enc_feature = enc.transform(X).toarray()

In [20]:
enc.get_feature_names_out()

array(['x0_Aatrox', 'x0_Ahri', 'x0_Akali', 'x0_Akshan', 'x0_Alistar',
       'x0_Amumu', 'x0_Anivia', 'x0_Annie', 'x0_Aphelios', 'x0_Ashe',
       'x0_Aurelion', 'x0_Azir', 'x0_Bard', 'x0_Blitzcrank', 'x0_Brand',
       'x0_Braum', 'x0_Caitlyn', 'x0_Camille', 'x0_Cassiopeia',
       'x0_Chogath', 'x0_Corki', 'x0_Darius', 'x0_Diana', 'x0_Dr.',
       'x0_Draven', 'x0_Ekko', 'x0_Elise', 'x0_Evelynn', 'x0_Ezreal',
       'x0_Fiddlesticks', 'x0_Fiora', 'x0_Fizz', 'x0_Galio',
       'x0_Gangplank', 'x0_Garen', 'x0_Gnar', 'x0_Gragas', 'x0_Graves',
       'x0_Gwen', 'x0_Hecarim', 'x0_Heimerdinger', 'x0_Illaoi',
       'x0_Irelia', 'x0_Ivern', 'x0_Janna', 'x0_Jarvan', 'x0_Jax',
       'x0_Jayce', 'x0_Jhin', 'x0_Jinx', 'x0_Kaisa', 'x0_Kalista',
       'x0_Karma', 'x0_Karthus', 'x0_Kassadin', 'x0_Katarina', 'x0_Kayle',
       'x0_Kayn', 'x0_Kennen', 'x0_KhaZix', 'x0_Kindred', 'x0_Kled',
       'x0_KogMaw', 'x0_LeBlanc', 'x0_Lee', 'x0_Leona', 'x0_Lillia',
       'x0_Lissandra', 'x0_Lucian', 'x0_L

In [21]:
enc.inverse_transform(enc_feature)

array([['Irelia', 0],
       ['Volibear', 1],
       ['LeBlanc', 2],
       ['Nidalee', 3],
       ['Rell', 4],
       ['Lee', 5],
       ['Hecarim', 6],
       ['Zeri', 7],
       ['Graves', 8],
       ['Azir', 9],
       ['Veigar', 10],
       ['Galio', 11],
       ['Akshan', 12],
       ['Karma', 13],
       ['Cassiopeia', 14],
       ['Nocturne', 15],
       ['Diana', 16],
       ['Twisted', 17],
       ['Tryndamere', 18],
       ['Caitlyn', 19],
       ['Kaisa', 20],
       ['Zoe', 21],
       ['Ryze', 22],
       ['Jayce', 23],
       ['Gwen', 24],
       ['Thresh', 25],
       ['Camille', 26],
       ['Yasuo', 27],
       ['Jhin', 28],
       ['Ahri', 29],
       ['Samira', 30],
       ['Aphelios', 31],
       ['Vex', 32],
       ['Xayah', 33],
       ['Akali', 34],
       ['Renata', 35],
       ['Zilean', 36],
       ['Gragas', 37],
       ['Xin', 38],
       ['Gnar', 39],
       ['Renekton', 40],
       ['Lux', 41],
       ['Gangplank', 42],
       ['Viego', 43],
       ['Synd

In [22]:
enc_feature

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [23]:
def one_hot_maker(input_df: pd.DataFrame, column_name: str) -> pd.DataFrame:

    df = pd.DataFrame(input_df[f'{column_name}'].unique())
    X = []
    for i in range(len(df)):
        x = []
        x.append(df[0][i])
        x.append(df.index.to_list()[i])
        X.append(x)

    enc = OneHotEncoder(handle_unknown='ignore')
    enc.fit(X)

    enc_feature = enc.transform(X).toarray()

    f_df = pd.DataFrame()
    f_df['Names'] = enc.inverse_transform(enc_feature).tolist()
    f_df['Feature'] = enc_feature.tolist()

    return f_df

In [24]:
one_hot_maker(df_general, 'Club_Name')

Unnamed: 0,Names,Feature
0,"[Dark Passage Academy, 0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,"[Fenerbahçe Academy, 1]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,"[Dusty, 2]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,"[Riddle Esports, 3]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,"[BISONS ECLUB, 4]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
...,...,...
760,"[Dynamo Eclot Academy, 760]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
761,"[Barrage.NA, 761]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
762,"[Legend Esport Gaming, 762]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
763,"[Shu Dai Xiong Gaming, 763]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [25]:
f_df = pd.DataFrame()

In [26]:
f_df['Names'] = enc.inverse_transform(enc_feature).tolist()
f_df['Feature'] = enc_feature.tolist()

In [27]:
f_df['Names'].iloc[0][0]

'Irelia'

In [28]:
pd.DataFrame(df_general['Club_Name'].unique())

Unnamed: 0,0
0,Dark Passage Academy
1,Fenerbahçe Academy
2,Dusty
3,Riddle Esports
4,BISONS ECLUB
...,...
760,Dynamo Eclot Academy
761,Barrage.NA
762,Legend Esport Gaming
763,Shu Dai Xiong Gaming


In [29]:
df_general

Unnamed: 0,Club_Name,Result,Kills,First_Blood,First_Tower,Dragons,Barons,Gold,Ban_1,Ban_2,...,Ban_4,Ban_5,Pick_1,Pick_2,Pick_3,Pick_4,Pick_5,Match_id,Time,Game_Version
0,Dark Passage Academy,LOSS,17,0.0,0,2,0.0,57.3k,Irelia,Yone,...,Akshan,Jayce,Gragas,Hecarim,Ahri,Jinx,Leona,391210,33:41,v12.5
1,Fenerbahçe Academy,LOSS,16,0.0,1,1,1.0,62.4k,Volibear,Lee,...,Sett,Camille,Akshan,Xin,Ryze,Xayah,Tahm,391211,34:54,v12.5
2,Fenerbahçe Academy,WIN,19,1.0,1,2,1.0,47.6k,LeBlanc,Volibear,...,Sett,Tryndamere,Lee,Kindred,Syndra,Xayah,Renata,391212,22:06,v12.5
3,Fenerbahçe Academy,WIN,21,1.0,0,3,1.0,47.9k,LeBlanc,Volibear,...,Ahri,Orianna,Aatrox,Lee,Syndra,Kaisa,Leona,391213,22:26,v12.5
4,Dusty,WIN,19,1.0,1,1,1.0,53.7k,Nidalee,Diana,...,Gangplank,Karma,Gnar,Volibear,Akali,Jinx,Nautilus,390410,26:38,v12.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57092,Copenhagen Wolves,LOSS,6,1.0,0,1,0.0,42.1k,Dr.,Yasuo,...,,,Shyvana,Olaf,Zyra,Lucian,Lulu,47100,30:37,v3.15
57093,Fnatic,WIN,15,1.0,0,1,1.0,72.5k,Yasuo,Elise,...,,,Renekton,Vi,Ziggs,Jinx,Leona,47090,43:49,v3.15
57094,Roccat,WIN,14,0.0,0,2,2.0,59.5k,Vi,Dr.,...,,,Malphite,Elise,Pantheon,Lucian,Thresh,47080,31:14,v3.15
57095,Alliance,LOSS,9,1.0,1,1,0.0,54.7k,Kassadin,KhaZix,...,,,Renekton,Vi,Gragas,Caitlyn,Thresh,47070,41:14,v3.15


In [30]:
df_general['Result'] = df_general['Result'].apply(lambda x: 1 if x == 'WIN' else 0)

In [31]:
df_general['Time'] = df_general['Time'].apply(lambda x: float(x.replace(':', '.')))

In [32]:
df_general['Gold'] = df_general['Gold'].apply(lambda x: float(x.replace('k', '000').replace('.', '')))

In [33]:
final_df = df_general.drop(['Club_Name', 'Ban_2', 'Ban_3', 'Ban_4', 'Ban_5', 'Pick_1', 'Pick_2', 'Pick_3', 'Pick_4', 'Pick_5', 'Match_id', 'Game_Version'], axis=1)

In [34]:
encoder_df = pd.DataFrame(enc.fit_transform(final_df[['Ban_1']]).toarray())

In [35]:
final_df = final_df.join(encoder_df)

In [36]:
final_df.dropna(inplace=True)

In [37]:
y = final_df['Result']
final_df.drop(['Ban_1', 'Result'], axis=1, inplace=True)

In [38]:
final_df['First_Blood'] = final_df['First_Blood'].apply(lambda x: float(x))

In [39]:
final_df

Unnamed: 0,Kills,First_Blood,First_Tower,Dragons,Barons,Gold,Time,0,1,2,...,150,151,152,153,154,155,156,157,158,159
0,17,0.0,0,2,0.0,573000.0,33.41,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,16,0.0,1,1,1.0,624000.0,34.54,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,19,1.0,1,2,1.0,476000.0,22.06,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,21,1.0,0,3,1.0,479000.0,22.26,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,19,1.0,1,1,1.0,537000.0,26.38,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57092,6,1.0,0,1,0.0,421000.0,30.37,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
57093,15,1.0,0,1,1.0,725000.0,43.49,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
57094,14,0.0,0,2,2.0,595000.0,31.14,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
57095,9,1.0,1,1,0.0,547000.0,41.14,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [40]:
X_train, X_test, y_train, y_test = train_test_split(final_df, y, test_size=0.20, random_state=42)

In [41]:
X_train

Unnamed: 0,Kills,First_Blood,First_Tower,Dragons,Barons,Gold,Time,0,1,2,...,150,151,152,153,154,155,156,157,158,159
29906,22,1.0,0,4,0.0,721000.0,42.03,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
22311,4,1.0,1,0,0.0,483000.0,32.06,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50625,16,0.0,0,0,1.0,68000.0,35.10,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
52017,6,0.0,0,3,1.0,492000.0,25.25,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20417,11,1.0,0,3,1.0,791000.0,43.35,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54361,1,0.0,0,0,0.0,538000.0,33.55,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
38175,19,1.0,1,3,1.0,498000.0,25.51,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
860,10,1.0,0,4,0.0,506000.0,30.58,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15797,24,0.0,1,3,1.0,60000.0,32.39,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [55]:
from sklearn.dummy import DummyClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import mean_squared_error

In [56]:
dummy_class = DummyClassifier()

In [57]:
dummy_class.fit(X_train, y_train)

DummyClassifier()

In [58]:
dummy_pred = dummy_class.predict(X_test)

In [59]:
dummy_class.score(X_test, y_test)

0.5101611772950245

In [60]:
knc = KNeighborsClassifier().fit(X_train, y_train)



In [61]:
knc_pred = knc.predict(X_test)



In [64]:
knc.score(X_test, y_test)



0.9012789067974772

In [65]:
mean_squared_error(y_test, dummy_pred)

0.4898388227049755

In [66]:
mean_squared_error(y_test, knc_pred)

0.09872109320252277