# Data Formatting and Neural Network Explorations
## Keras, scikit-learn, Pandas, Numpy

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
pd.set_option('display.max_columns', None)
%matplotlib inline

In [None]:
df = pd.read_csv('df.csv',index_col=0)
df.head(10)

In [None]:
df.shape

## Fit Scaler to Dataset


In [None]:
# this df contains 2 games worth of rows
dft = df.head(76).copy()

In [None]:
# this df contains 1 game worth of rows
dfs = df.head(38).copy()

In [None]:
from sklearn.preprocessing import StandardScaler

data = df.iloc[:,2:-1]
scaler = StandardScaler()
scaler.fit(data)

In [None]:
df.head()

## Split into Train/Test Sets and Standardize

In [None]:
# want to keep group/result columns and add them back after scaling
cols = list(df.columns.values)
hold_out_cols = ['match_id', 'time', 'result']
ho_df = df[hold_out_cols].copy()

In [None]:
cols_scale = ['gold', 'top_gold', 'jgl_gold', 'mid_gold', 'adc_gold', 'sup_gold', 'kill_total',
 'assist_total', 'opp_kill_total', 'opp_assist_total', 'r_inhib_count', 'r_baron_count', 'r_tower_count',
 'r_herald_count', 'r_drag_count', 'b_inhib_count', 'b_baron_count', 'b_tower_count', 'b_herald_count',
 'b_dragon_count', 'kda', 'opp_kda']

In [None]:
# scale data
data = scaler.transform(data)

In [None]:
sdf = pd.DataFrame(data)
sdf.columns = cols_scale
sdf.head()

In [None]:
print(len(sdf))
print(len(ho_df))

In [None]:
sdf = pd.merge(sdf, ho_df, left_index=True, right_index=True)
sdf.head()

In [None]:
fcols = [ 'match_id', 'time','gold', 'top_gold', 'jgl_gold', 'mid_gold', 'adc_gold', 'sup_gold', 'kill_total',
 'assist_total', 'opp_kill_total', 'opp_assist_total', 'r_inhib_count', 'r_baron_count', 'r_tower_count',
 'r_herald_count', 'r_drag_count', 'b_inhib_count', 'b_baron_count', 'b_tower_count', 'b_herald_count',
 'b_dragon_count', 'kda', 'opp_kda', 'result']

In [None]:
sdf1 = sdf[fcols]
sdf1.head()

In [None]:
len(sdf1)

In [None]:
# create X and y dataframes for training/testing
X = sdf1.loc[:,sdf1.columns != 'result'].values
y = sdf1.loc[:,sdf1.columns == 'result'].values
groups = sdf1['match_id']

In [None]:
from sklearn.model_selection import GroupKFold
# 80/20 split for train/test groups
gkf = GroupKFold(n_splits=5)

for train_index, test_index in gkf.split(X, y,groups):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

### Prepare Train Data

In [None]:
# reshape data to work with previously written parse_game function
xtrain = pd.DataFrame(X_train)
ytrain = pd.DataFrame(y_train)

In [None]:
print(len(ytrain))
print(len(xtrain))

In [None]:
train_df = pd.merge(xtrain, ytrain, left_index=True, right_index=True)

In [None]:
train_df.rename(columns={'0_y':'result','0_x':'match_id'},inplace=True)

In [None]:
train_df.sort_values(['match_id',1],inplace=True)
train_df.head()

### Prepare Test Data

In [None]:
xtest = pd.DataFrame(X_test)
ytest = pd.DataFrame(y_test)
print(len(xtest))
print(len(ytest))

In [None]:
test_df = pd.merge(xtest, ytest, left_index=True, right_index=True)

In [None]:
test_df.rename(columns={'0_y':'result','0_x':'match_id'},inplace=True)
test_df.head()

## Parse Game Data to Reformat for Neural Network

In [None]:
def parse_game(grp):
    """Input is a single groupby object
    Output is a list of 3d arrays, each element contains 10 rows (which are equivalent to minutes 
       for this dataset). The function tracks the length of each group and stops at length minus 1. 
       After a group is finished the function performs the same process on the next group.
    """  
    t = 10
    start = 0
   
    length = len(grp)

    while t < length:
        
        # take a 10 row chunk and convert it to array
        
        data = grp.iloc[:,2:-1][start:t].values
   
        df_list.append(data)
        result_list.append(grp.iloc[:,-1:][start:t].values[0])
        
        # stop when t reaches length - 1
        start += 1
        t += 1
    
    return df_list  

### Training Data

In [None]:
df_list = []
result_list = []

gb = train_df.groupby('match_id')

gb.apply(lambda group: parse_game(group));

In [None]:
print(len(df_list))
print(len(result_list))

In [None]:
num_features = 22
time_steps = 10

In [None]:
# reshape y to 1d numpy array
length_r = len(result_list)
y_train = np.array(result_list)
y_train = y_train.ravel()

In [None]:
# reshape x to 3d numpy array
length = len(df_list)
X_train = np.array(df_list)
X_train = X_train.reshape((length, time_steps, num_features))

### Test Data

In [None]:
def parse_game_test(grp):
    """Inputs are 1. a groupby group object
    Output is a list of 3d arrays, each element contains 10 rows (which are equivalent to minutes 
       for this dataset). The function tracks the length of each group and stops at length minus 1. 
       After a group is finished the function performs the same process on the next group.
    """  
    t = 10
    start = 0
   
    length = len(grp)

    while t < length:
        
        # take a 10 row chunk and convert it to array
        
        data = grp.iloc[:,2:-1][start:t].values
   
        df_list_test.append(data)
        result_list_test.append(grp.iloc[:,-1:][start:t].values[0])
        
        # stop when t reaches length - 1
        start += 1
        t += 1
    
    return df_list  

In [None]:
df_list_test = []
result_list_test = []

gb = test_df.groupby('match_id')

gb.apply(lambda group: parse_game_test(group));

In [None]:
print(len(df_list_test))
print(len(result_list_test))

In [None]:
length_r = len(result_list_test)
y_test = np.array(result_list_test)
y_test = y_test.ravel()

In [None]:
length = len(df_list_test)
X_test = np.array(df_list_test)
X_test = X_test.reshape((length, time_steps, num_features))

## Keras LSTM Model

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Embedding
from keras.layers import LSTM
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier

In [None]:
print(X_train[:-1].shape)
print(y_train[:-1].shape)
print(X_test.shape)
print(y_test.shape)

In [None]:
model = Sequential()
model.add(LSTM(32, batch_input_shape=(17651, X_train[:-1].shape[1],X_train[:-1].shape[2]),
               dropout=0.2, recurrent_dropout=0.2,stateful=True))
#model.add(Dropout(0.2))
model.add(Dense(1,activation='sigmoid'))

model.compile(loss='binary_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])
for i in range(1000):
    model.fit(X_train[:-1], y_train[:-1], batch_size=17651,shuffle=False, epochs=1,verbose=1)
    model.reset_states()

In [None]:
score = model.evaluate(X_test[:-7], y_test[:-7], verbose=0,batch_size=17651)
score

In [None]:
model.metrics_names

## Keras GridSearch

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Embedding
from keras.layers import LSTM
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier

In [None]:
def create_model():
    model = Sequential()
    model.add(LSTM(32, batch_input_shape=(17651, X_train[:-1].shape[1],X_train[:-1].shape[2]),
               dropout=0.2, recurrent_dropout=0.2,stateful=True))
    model.add(Dense(1,activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])
    return model   

In [None]:
model = KerasClassifier(build_fn=create_model, verbose=0)

In [None]:
batch_size = [1024, 2048, 4096]
epochs = [10, 20, 30]

param_grid = dict(batch_size=batch_size,epochs=epochs)

grid = GridSearchCV(estimator=model,param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X_train, y_train)

In [None]:
print("Best: %f using %s" % grid_result.best_score_, grid_result.best_params_)
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))