### Predictions on Test Dataset
Train with the best models from Linear Regression, LightGBM, and Neural Networks and make predictions on the test data of March 2019 to create lineups.

In [7]:
import os
import glob
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm import tqdm_notebook as tqdm
from sklearn.preprocessing import MinMaxScaler

from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasRegressor

from constants import DATA_DIR
from utils import csv_concatenate, calculate_FPTS, calculate_MAE, calculate_RMSE, cross_val, load_full_dataset

In [8]:
np.random.seed(23)
warnings.filterwarnings("ignore")

In [18]:
def root_mean_squared_error(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true))) 

def create_model(model_num):
    def model_1():
        model = Sequential()
        model.add(Dense(X.shape[1], input_dim=X.shape[1], activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(32, activation='relu'))
        model.add(Dense(1))
        model.compile(loss=root_mean_squared_error, optimizer='adam')
        model.summary()
        return model

    def model_2():
        model = Sequential()
        model.add(Dense(X.shape[1], input_dim=X.shape[1], activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(32, activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(1))
        model.compile(loss=root_mean_squared_error, optimizer='adam')
        model.summary()
        return model

    def model_3():
        model = Sequential()
        model.add(Dense(X.shape[1], input_dim=X.shape[1], activation='relu'))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(256, activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(1))
        model.compile(loss=root_mean_squared_error, optimizer='adam')
        model.summary()
        return model
    
    if model_num == 1:
        return model_1()
    elif model_num == 2:
        return model_2()
    elif model_num == 3:
        return model_3()
    else:
        return "invalid model_num"

In [37]:
def load_trained_model(weights_path):
    model = create_model()
    model.load_weights(weights_path)
    return model

def get_weights_path_and_epoch(model_num):
    filepaths = glob.glob(DATA_DIR+"/Models/NN/Model_{}/*.hdf5".format(str(model_num)))
    losses = [float(filepath[-12:-5]) for filepath in filepaths]
    epochs = losses.index(min(losses))
    print('Model {} | Lowest Valid Error: {} at Epoch {}'.format(model_num, 
                                                                 min(losses),
                                                                 epochs))
    return (filepaths[losses.index(min(losses))], epochs)

In [34]:
weighting = 'quad'
X, y = load_full_dataset(weighting)

In [None]:
model = KerasRegressor(build_fn=create_model(1),
                       epochs=50,
                       batch_size=32,
                       validation_split=0.2,
                       shuffle=True,
                       verbose=1)
h1 = model.fit(X, y)

In [None]:
plt.plot(h1.history['loss'])  
plt.plot(h1.history['val_loss'])  
plt.title('Model Loss')  
plt.ylabel('Loss')  
plt.xlabel('Epoch')  
plt.legend(['Train', 'Validation'], loc='upper right')  
plt.show()

In [None]:
model = KerasRegressor(build_fn=model_2,
                       epochs=60,
                       batch_size=32,
                       validation_split=0.2,
                       shuffle=True,
                       verbose=1)
h2 = model.fit(X, y)

In [None]:
model = KerasRegressor(build_fn=model_3,
                       epochs=60,
                       batch_size=32,
                       validation_split=0.2,
                       shuffle=True,
                       verbose=1)
h3 = model.fit(X, y)

In [None]:
for hist in [h1, h3]:
    plt.subplot(111)  
    plt.plot(hist.history['loss'])  
    plt.plot(hist.history['val_loss'])  
    plt.title('Model Loss')  
    plt.ylabel('Loss')  
    plt.xlabel('Epoch')  
    plt.legend(['Train', 'Validation'], loc='upper right')  
    plt.show()

In [None]:
model = KerasRegressor(build_fn=model_1,
                       epochs=30,
                       batch_size=32,
                       validation_split=0.2,
                       shuffle=True,
                       verbose=1)

kfold = KFold(n_splits=5, shuffle=True)

In [None]:
results_MAE = cross_val_score(model, X, y, cv=kfold, n_jobs=1, scoring='neg_mean_absolute_error')
results_RMSE = cross_val_score(model, X, y, cv=kfold, n_jobs=1, scoring='neg_mean_squared_error')

In [None]:
print(np.sqrt(-results_RMSE))
print("Results: %.4f RMSE" % np.sqrt(np.mean(-results_RMSE)))

print(np.sqrt(-results_MAE))
print("Results: %.4f MAE" % np.mean(-results_MAE))

In [None]:
model = KerasRegressor(build_fn=model_3,
                       epochs=15,
                       batch_size=32,
                       validation_split=0.2,
                       shuffle=True,
                       verbose=2)

kfold = KFold(n_splits=5, shuffle=True)

In [None]:
results_MAE = cross_val_score(model, X, y, cv=kfold, n_jobs=1, scoring='neg_mean_absolute_error')
results_RMSE = cross_val_score(model, X, y, cv=kfold, n_jobs=1, scoring='neg_mean_squared_error')

In [None]:
print(np.sqrt(-results_RMSE))
print("Results: %.4f RMSE" % np.sqrt(np.mean(-results_RMSE)))

print(np.sqrt(-results_MAE))
print("Results: %.4f MAE" % np.mean(-results_MAE))

### Prediction

In [None]:
### Train Test Split
X = df_features.sort_values(by=['Date','Name']).reset_index(drop=True)

target_month = 201903

start = 20190301
end = 20190331

test_indices = (df_features['Date'] >= start) & (df_features['Date'] <= end)
train_indices = [not value for value in test_indices]

X_train = df_features.loc[train_indices, selected]
X_test = df_features.loc[test_indices, selected]

y_train = df_features.loc[train_indices, 'FPTS'].values.reshape(-1,1).flatten()
y_test = df_features.loc[test_indices, 'FPTS'].values.reshape(-1,1).flatten()

# X_train = MinMaxScaler().fit_transform(X_train)
# X_test = MinMaxScaler().fit_transform(X_test)

In [None]:
X_train.shape, X_test.shape

In [None]:
pred_baseline = df_baseline.loc[(df_baseline['Date'] >= start) & (df_baseline['Date'] <= end), 'FPTS_pred'].reset_index(drop=True)
actual = df_baseline.loc[(df_baseline['Date'] >= start) & (df_baseline['Date'] <= end), 'FPTS'].reset_index(drop=True)

print(calculate_MAE(pred_baseline, actual))
print(calculate_RMSE(pred_baseline, actual))

In [None]:
reg = LinearRegression()
reg.fit(X_train, y_train)
pred_lm = reg.predict(X_test)

print(calculate_MAE(pred_lm, y_test))
print(calculate_RMSE(pred_lm, y_test))

In [None]:
d_train = lgb.Dataset(X_train, label=y_train)
d_test = lgb.Dataset(X_test, label=y_test)

watchlist = [d_valid]

opt_params = df_params.loc[0, 'params']

model = lgb.train(opt_params, d_train, watchlist, verbose_eval=1)
pred_gbm = model.predict(X_test)

print(calculate_MAE(pred_gbm, y_test))
print(calculate_RMSE(pred_gbm, y_test))

In [None]:
best_parameters = {'max_depth':6, 'n_estimators':250, 'min_child_weight':4, 'colsample_bytree':0.6, 
                   'colsample_bylevel':0.7, 'subsample':1.0, 'gamma':0.0, 'learning_rate':0.026944654231987667}

reg = xgb.XGBRegressor(**best_parameters)
reg.fit(X_train, y_train, verbose=1)
pred_xgb = reg.predict(X_test)

print(calculate_MAE(pred_xgb, y_test))
print(calculate_RMSE(pred_xgb, y_test))

In [None]:
def advanced_model():
    model = Sequential()
    model.add(Dense(X_train.shape[1], input_dim=X_train.shape[1], activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(64, activation='relu'))
    #model.add(Dropout(0.2))
    model.add(Dense(32, activation='relu'))
    #model.add(Dropout(0.2))
    model.add(Dense(1))
    model.compile(loss='mse', optimizer='adam')
    return model

In [None]:
model = KerasRegressor(build_fn=advanced_model,
                       epochs=30,
                       batch_size=64,
                       validation_split=0.2,
                       shuffle=True,
                       verbose=1)

h = model.fit(X_train, y_train)

In [None]:
pred_nn = model.predict(X_test)
print(calculate_MAE(pred_nn, y_test))
print(calculate_RMSE(pred_nn, y_test))

### Write prediction into csv

In [None]:
df_pred = df_features.loc[test_indices, ['Date', 'Name', 'Team', 'FPTS', 'Pos', 'Salary']]
df_pred['Pred'] = pred_gbm

In [None]:
df_pred.to_csv(os.path.join(DATA_DIR, 'Predictions/{}.csv'.format(pd.Timestamp.now().strftime('%Y%m%d-%Hh%Mm'))), index=False)