### Predictions on Test Dataset
Train with the best models from Linear Regression, LightGBM, and Neural Networks and make predictions on the test data of March 2019 to create lineups.

In [66]:
import os
import glob
import warnings

import numpy as np
import pandas as pd
import lightgbm as lgb
import matplotlib.pyplot as plt

from tqdm import tqdm_notebook as tqdm
from sklearn.preprocessing import MinMaxScaler

from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasRegressor

import utils
from constants import DATA_DIR

In [67]:
np.random.seed(23)
warnings.filterwarnings("ignore")

In [68]:
def root_mean_squared_error(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true)))


def model_1():
    model = Sequential()
    model.add(Dense(X.shape[1], input_dim=X.shape[1], activation="relu"))
    model.add(Dense(64, activation="relu"))
    model.add(Dense(32, activation="relu"))
    model.add(Dense(1))
    model.compile(loss=root_mean_squared_error, optimizer="adam")
    model.summary()
    return model

In [69]:
def save_pred(predictor, y_pred):
    df_pred = df_features.loc[
        test_indices, ["Date", "Name", "Team", "FPTS", "Pos", "Salary"]
    ]
    df_pred["Pred"] = y_pred
    df_pred.to_csv(
        os.path.join(
            DATA_DIR,
            "Predictions/{}-{}.csv".format(
                pd.Timestamp.now().strftime("%Y%m%d-%Hh%Mm"), predictor
            ),
        ),
        index=False,
    )

In [62]:
weighting = "quad"
df_features = utils.csv_concatenate(
    os.path.join(DATA_DIR, "Dataframes", "Modelling", "Features", weighting)
)
df_features = df_features.sort_values(by=["Date", "Name"]).reset_index(drop=True)
features = df_features.columns[5:]

In [70]:
target_month = 201903

start = 20190301
end = 20190331

test_indices = (df_features["Date"] >= start) & (df_features["Date"] <= end)
train_indices = [not value for value in test_indices]

X_train = df_features.loc[train_indices, features]
X_test = df_features.loc[test_indices, features]

y_train = df_features.loc[train_indices, "FPTS"].values.reshape(-1, 1).flatten()
y_test = df_features.loc[test_indices, "FPTS"].values.reshape(-1, 1).flatten()

X_train = MinMaxScaler().fit_transform(X_train)
X_test = MinMaxScaler().fit_transform(X_test)

### LightGBM

In [72]:
path_params = sorted(glob.glob(DATA_DIR + "/Models/LightGBM/Params/*.json"))[-1]
df_params = pd.read_json(path_params, lines=True)
df_params = (
    df_params.loc[:, ["target", "params"]]
    .sort_values(by="target", ascending=False)
    .reset_index()
)
display(df_params.head(5))


opt_params = df_params.loc[0, "params"]

for key in opt_params.keys():
    if key in ["max_depth", "num_leaves", "n_estimators"]:
        opt_params[key] = int(round(opt_params[key]))

d_train = lgb.Dataset(X_train, label=y_train)
reg = lgb.train(opt_params, d_train)
y_pred = reg.predict(X_test)

print("<--- Testing Error --->")
print(utils.calculate_MAE(y_pred, y_test))
print(utils.calculate_RMSE(y_pred, y_test))

save_pred("lgb", y_pred)

Unnamed: 0,index,target,params
0,17,-9.080672,"{'bagging_fraction': 0.9397558879953231, 'feat..."
1,13,-9.080783,"{'bagging_fraction': 0.8967783942656771, 'feat..."
2,3,-9.082169,"{'bagging_fraction': 0.965750296864551, 'featu..."
3,19,-9.082335,"{'bagging_fraction': 0.8, 'feature_fraction': ..."
4,5,-9.083474,"{'bagging_fraction': 0.9803204385614761, 'feat..."


<--- Testing Error --->
8.283806238168077
10.71637699430448


### NN

In [73]:
model = KerasRegressor(
    build_fn=model_1,
    epochs=28,
    batch_size=32,
    validation_split=0.2,
    shuffle=True,
    verbose=1,
)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("<--- Testing Error --->")
print(utils.calculate_MAE(y_pred, y_test))
print(utils.calculate_RMSE(y_pred, y_test))

save_pred("nn", y_pred)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_25 (Dense)             (None, 41)                1722      
_________________________________________________________________
dense_26 (Dense)             (None, 64)                2688      
_________________________________________________________________
dense_27 (Dense)             (None, 32)                2080      
_________________________________________________________________
dense_28 (Dense)             (None, 1)                 33        
Total params: 6,523
Trainable params: 6,523
Non-trainable params: 0
_________________________________________________________________
Train on 91903 samples, validate on 22976 samples
Epoch 1/28
Epoch 2/28
Epoch 3/28
Epoch 4/28
Epoch 5/28
Epoch 6/28
Epoch 7/28
Epoch 8/28
Epoch 9/28
Epoch 10/28
Epoch 11/28
Epoch 12/28
Epoch 13/28
Epoch 14/28
Epoch 15/28
Epoch 16/28
Epoch 17/28
Epoch 18/28
Epoch 19/28
Epo