In [1]:
import numpy as np
import pandas
from pandas.core.window.indexers import BaseIndexer


In [2]:

class CustomIndexer(BaseIndexer):
    def get_window_bounds(self, num_values=0, min_periods=None, center=None, closed=None):
        end = np.arange(0, num_values, dtype="int64")
        end += 4
        start = end - 3

        end = np.clip(end, 0, num_values)
        start = np.clip(start, 0, num_values)

        return start, end


def concat_rows(df, n):
    new_cols = [
        f"{col}{idx}"
        for idx in range(1, n + 1)
        for col in df.columns
    ]
    n_cols = len(df.columns)
    new_df = pandas.DataFrame(
        df.values.reshape([-1, n_cols * n]),
        columns=new_cols
    )
    return new_df

In [3]:
filename = "data/2021_LoL_esports_match_data_from_OraclesElixir_20210515.csv"

data = pandas.read_csv(filename)

In [4]:
team_columns = ["date", "actual_result", "playerid", "gameid", "team", "gamelength", "result", "dragons", "barons",
                "riftheralds", "towers"]
player_columns = ["date", "player", "gameid", "kills", "deaths", "assists", "dpm", "damageshare",
                  "damagetakenperminute", "wpm",
                  "vspm", "earned gpm", "cspm", "csat10", "goldat10", "killsat10", "deathsat10", "assistsat10",
                  "csat15", "goldat15", "killsat15", "deathsat15", "assistsat15"]


In [5]:
data = data.sort_values(by=["date", "team"])
data = data.reset_index(drop=True)
data["actual_result"] = data["result"]

In [6]:


indexer = CustomIndexer(window_size=1)

player_data = (
    data
        .filter(player_columns)
        .groupby(pandas.Grouper(key="player"))
        .rolling(window=indexer, min_periods=1, on="gameid")
        .mean()
        .reset_index()
        .rename(columns={"level_1": "id"})
        .sort_values(by="id")
        .reset_index()
        .drop(columns=["index", "player", "id", "gameid"])
)



In [7]:
team_data = (
    data
        .query("playerid > 10")
        .filter(team_columns)
        .groupby(pandas.Grouper(key="team"))
        .rolling(window=indexer, min_periods=1, on="actual_result")
        .mean()
        .reset_index()
        .rename(columns={"level_1": "id"})
        .sort_values(by="id")
        .reset_index()
        .drop(columns=["index", "playerid", "id"])
)

In [17]:
game_data_player = concat_rows(player_data, 10)
game_data_team = concat_rows(team_data, 2)
game_data_team.drop(columns=["actual_result2", "team1", "team2"], inplace=True)

game_data = (
    pandas
        .concat([game_data_team, game_data_player], axis=1)
        .dropna()
)


game_result = game_data["actual_result1"]
game_data.drop(columns=["actual_result1"], inplace=True)


In [14]:
from sklearn.model_selection import train_test_split

trainX, textX, trainY, testY = train_test_split(
    game_data, game_result, test_size=.33)



# np.savetxt("aggr_data.csv", data_aggr)

694