## Time Series Analysis and Feature Generation Using ts-fresh
## Michael Phillips

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
pd.set_option('display.max_columns', None)
%matplotlib inline

In [255]:
df = pd.read_csv('gold_diff_merged.csv', index_col=0)

In [256]:
df.drop(['league','season', 'year', 'team_name', 'game_length', 'player_name', 'champion'], inplace=True, axis=1)

In [257]:
cols = ['match_id', 'red_or_blue_side', 'result', 'type']
df.rename(columns=lambda x: x[4:] if x not in cols else x,inplace=True)
df.head()

Unnamed: 0,match_id,red_or_blue_side,result,type,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80
0,001a1f289e3bab22,blue,0,total_gold,0,15,46,-133,-60,-904,-505,-852,-763,-1224,-998,-133,-1337,-1246,-1185,-1201,-1601,-3824,-4471,-4017,-2044,-2690,-2997,-1582,-1827,-1672,-1895,-2741,-3087,-2756,-3615,-3957,-4035,-3850,-4045,-4100,-8951,-8953,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,001a1f289e3bab22,red,1,total_gold,0,-15,-46,133,60,904,505,852,763,1224,998,133,1337,1246,1185,1201,1601,3824,4471,4017,2044,2690,2997,1582,1827,1672,1895,2741,3087,2756,3615,3957,4035,3850,4045,4100,8951,8953,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,001f50c8547e2e73,blue,1,total_gold,0,0,0,89,-92,-174,191,69,253,362,76,1964,2364,3302,2745,3678,2492,3066,4765,7023,7136,7480,9872,9976,9944,11088,11137,11410,11434,11909,11660,12350,15929,17225,16526,18484,21780,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,001f50c8547e2e73,red,0,total_gold,0,0,0,-89,92,174,-191,-69,-253,-362,-76,-1964,-2364,-3302,-2745,-3678,-2492,-3066,-4765,-7023,-7136,-7480,-9872,-9976,-9944,-11088,-11137,-11410,-11434,-11909,-11660,-12350,-15929,-17225,-16526,-18484,-21780,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,002f613ae09ad421,blue,1,total_gold,0,0,17,-106,-136,400,363,829,1532,1862,1473,1514,2290,2793,2975,3497,5391,5188,6938,8541,9835,10428,11160,11492,12184,14247,14372,14594,16182,16344,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [258]:
# annotate match_id to differentiate the roles/games
df['match_id2'] = df['match_id']
df.loc[(df['red_or_blue_side'] == 'blue') & (df['type'] == 'total_gold'),'match_id2'] = df['match_id2'] + '_b'
df.loc[(df['red_or_blue_side'] == 'red') & (df['type'] == 'total_gold'),'match_id2'] = df['match_id2'] + '_r'

df.loc[(df['red_or_blue_side'] == 'blue') & (df['type'] == 'top_gold'),'match_id2'] = df['match_id2'] + '_bT'
df.loc[(df['red_or_blue_side'] == 'blue') & (df['type'] == 'jungle_gold'),'match_id2'] = df['match_id2'] + '_bJ'
df.loc[(df['red_or_blue_side'] == 'blue') & (df['type'] == 'mid_gold'),'match_id2'] = df['match_id2'] + '_bM'
df.loc[(df['red_or_blue_side'] == 'blue') & (df['type'] == 'adc_gold'),'match_id2'] = df['match_id2'] + '_bA'
df.loc[(df['red_or_blue_side'] == 'blue') & (df['type'] == 'support_gold'),'match_id2'] = df['match_id2'] + '_bS'

df.loc[(df['red_or_blue_side'] == 'red') & (df['type'] == 'top_gold'),'match_id2'] = df['match_id2'] + '_rT'
df.loc[(df['red_or_blue_side'] == 'red') & (df['type'] == 'jungle_gold'),'match_id2'] = df['match_id2'] + '_rJ'
df.loc[(df['red_or_blue_side'] == 'red') & (df['type'] == 'mid_gold'),'match_id2'] = df['match_id2'] + '_rM'
df.loc[(df['red_or_blue_side'] == 'red') & (df['type'] == 'adc_gold'),'match_id2'] = df['match_id2'] + '_rA'
df.loc[(df['red_or_blue_side'] == 'red') & (df['type'] == 'support_gold'),'match_id2'] = df['match_id2'] + '_rS'

In [259]:
m_id = df['match_id2']
df.drop(['match_id','match_id2'], axis=1, inplace=True)
df.insert(0, 'match_id', m_id)
df.head()

Unnamed: 0,match_id,red_or_blue_side,result,type,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80
0,001a1f289e3bab22_b,blue,0,total_gold,0,15,46,-133,-60,-904,-505,-852,-763,-1224,-998,-133,-1337,-1246,-1185,-1201,-1601,-3824,-4471,-4017,-2044,-2690,-2997,-1582,-1827,-1672,-1895,-2741,-3087,-2756,-3615,-3957,-4035,-3850,-4045,-4100,-8951,-8953,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,001a1f289e3bab22_r,red,1,total_gold,0,-15,-46,133,60,904,505,852,763,1224,998,133,1337,1246,1185,1201,1601,3824,4471,4017,2044,2690,2997,1582,1827,1672,1895,2741,3087,2756,3615,3957,4035,3850,4045,4100,8951,8953,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,001f50c8547e2e73_b,blue,1,total_gold,0,0,0,89,-92,-174,191,69,253,362,76,1964,2364,3302,2745,3678,2492,3066,4765,7023,7136,7480,9872,9976,9944,11088,11137,11410,11434,11909,11660,12350,15929,17225,16526,18484,21780,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,001f50c8547e2e73_r,red,0,total_gold,0,0,0,-89,92,174,-191,-69,-253,-362,-76,-1964,-2364,-3302,-2745,-3678,-2492,-3066,-4765,-7023,-7136,-7480,-9872,-9976,-9944,-11088,-11137,-11410,-11434,-11909,-11660,-12350,-15929,-17225,-16526,-18484,-21780,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,002f613ae09ad421_b,blue,1,total_gold,0,0,17,-106,-136,400,363,829,1532,1862,1473,1514,2290,2793,2975,3497,5391,5188,6938,8541,9835,10428,11160,11492,12184,14247,14372,14594,16182,16344,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [260]:
tdf = df.loc[(df['type'] == 'total_gold'),:].copy()
topdf = df.loc[(df['type'] == 'top_gold'),:].copy()
jdf = df.loc[(df['type'] == 'jungle_gold'),:].copy()
mdf = df.loc[(df['type'] == 'mid_gold'),:].copy()
adf = df.loc[(df['type'] == 'adc_gold'),:].copy()
sdf = df.loc[(df['type'] == 'support_gold'),:].copy()

In [261]:
ty = tdf[['match_id','result']]
tdf.drop(['type', 'red_or_blue_side','result'], axis=1,inplace=True)

In [262]:
tdf.shape

(7560, 82)

In [263]:
tdf2 = pd.melt(tdf, id_vars=['match_id'])
tdf2.variable = tdf2.variable.astype(int)

In [264]:
tdf2.sort_values(['match_id', 'variable']).to_csv('tdf2.csv')