In [None]:
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
with open('../data/static_05_12_23/raw/static_full.pkl', 'rb') as f:
    df = pickle.load(f)

In [None]:
df.head(1)

In [None]:
df.info()

In [None]:
import os
import sys

print(os.path.dirname(sys.executable))

In [None]:
df.describe()

In [None]:
df.dtypes

In [None]:
sns.heatmap(df.isna())

In [None]:
sns.heatmap(df.corr())

In [None]:
df.isna().sum()

In [None]:
#currently, there are 81710 missing kda entries in the database

In [None]:
def drop_missing(df: pd.DataFrame) -> pd.DataFrame:
    len_before = len(df)
    df_new = df.dropna(axis=0)
    print(f'dropped {len_before - len(df_new)} rows')
    return df_new


def replace_missing(df: pd.DataFrame) -> pd.DataFrame:
    df_new = df.fillna(-1)
    return df_new

In [None]:
df_nomissing = drop_missing(df)
#replace_missing(df)

the first 5 participants are always in the same team:

In [None]:
df['participant1_win'] == df['participant2_win']

In [None]:
np.all(df['participant1_win'] == df['participant2_win'])

In [None]:
print(len(df[df['participant1_win'] != df['participant2_win']]))
print(len(df[df['participant1_win'] != df['participant3_win']]))

In [None]:
df.head()

In [None]:
def get_winning_team(df: pd.DataFrame):
    df['label'] = np.where(df['participant1_win'], 0, 1)  # 0 = team1 won, 1 = team2 won 

In [None]:
get_winning_team(df)

In [None]:
def drop_wrong_data(df: pd.DataFrame):
    len_before = len(df)
    df.drop(df[df['mapId'] != 11].index, inplace=True)
    df.drop(df[df['queueId'] != 420].index, inplace=True)
    df.drop(df[df['gameDuration'] < 900].index, inplace=True)
    df.drop(df[df['platformId'] != 'EUW1'].index, inplace=True)
    df.drop(df[df['seasonId'] != 13].index, inplace=True)
    df.drop(df[df['gameVersion'] != df['gameVersion'][0]].index, inplace=True)
    df.drop(df[df['patch'] != df['patch'][0]].index, inplace=True)
    print(f'dropped {len_before - len(df)} rows')

In [None]:
df

In [None]:
drop_wrong_data(df)

In [None]:
def drop_irrelevant(df: pd.DataFrame):
    irrelevant_cols = ['gameDuration', 'gameCreation', 'gameVersion', 'mapId', 'queueId', 'patch', 'seasonId',
                       'platformId']
    for i in range(1, 11):
        irrelevant_cols.append(f'participant{i}_win')
    df.drop(columns=irrelevant_cols, inplace=True)


In [None]:
drop_irrelevant(df)

In [None]:
df.reset_index(inplace=True, drop=True)
df

In [None]:
df['participant1_tier']

In [None]:
from enum import Enum


class Rank(Enum):
    IRON = 0
    BRONZE = 1
    SILVER = 2
    GOLD = 3
    PLATINUM = 4
    EMERALD = 5
    DIAMOND = 6
    MASTER = 7
    GRANDMASTER = 8
    CHALLENGER = 9


def format_rank(tier: str, rank: str) -> str:
    return f'{tier}.{rank}'


def fix_rank(df: pd.DataFrame):
    for i in range(1, 11):
        df[f'participant{i}_tier'] = df[f'participant{i}_tier'].apply(lambda x: Rank[x].value)
        df.loc[:, f'participant{i}_tier'] = df.apply(
            lambda x: format_rank(x[f'participant{i}_tier'], x[f'participant{i}_rank']), axis=1)
        df[f'participant{i}_tier'] = df[f'participant{i}_tier'].astype(float)
        df.drop(columns=[f'participant{i}_rank'], inplace=True)

In [None]:
fix_rank(df)
df

In [None]:
def calc_winrate(df: pd.DataFrame):
    for i in range(1, 11):
        df[f'participant{i}_winrate'] = df[f'participant{i}_wins'] / (
                df[f'participant{i}_wins'] + df[f'participant{i}_losses'])
        df.drop(columns=[f'participant{i}_wins', f'participant{i}_losses'], inplace=True)

In [None]:
calc_winrate(df)
df

In [None]:
cols = df.columns.tolist()
index = cols.index('label')
cols = cols[:index] + cols[index + 1:] + [cols[index]]  # label as last column
df = df[cols]

In [None]:
def fix_teamId(df: pd.DataFrame):
    for i in range(1, 11):
        df.loc[f'participant{i}_teamId'] = df[f'participant{i}_teamId'] // 100 - 1

In [None]:
fix_teamId(df)
df

In [None]:
def convert_booleans(df: pd.DataFrame):
    df.replace({True: 1, False: 0}, inplace=True)

In [None]:
convert_booleans(df)
df

In [None]:
from datetime import datetime


def convert_lastPlayTime(df: pd.DataFrame):
    for i in range(1, 11):
        df.loc[f'participant{i}_champion_lastPlayTime'] = df[f'participant{i}_champion_lastPlayTime'].apply(
            lambda x: int((datetime.now() - datetime.fromtimestamp(x / 1000)).total_seconds()))

In [None]:
df[df.isna().any(axis=1)]

In [None]:
df.isna().sum()

In [None]:
drop_missing(df)

In [None]:
convert_lastPlayTime(df)
df

In [None]:
# calculate teamIds from participant_win


In [None]:
#for i, col in enumerate(df.columns):
#   plt.figure(i)
#  sns.histplot(df[col], stat='density', kde=True)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, :-1], df.iloc[:, -1], test_size=0.2, random_state=42,
                                                    shuffle=True)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train = np.append(X_train, np.expand_dims(y_train, axis=1), axis=1)
X_test = np.append(X_test, np.expand_dims(y_test, axis=1), axis=1)

In [None]:
X_train

In [None]:
np.shape(np.expand_dims(y_train, axis=1))

In [None]:
np.shape(X_train)

In [None]:
#np.save('../data/train_static', X_train)
#np.save('../data/test_static', X_test)