In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns

In [None]:
num_games = 2

In [None]:
full_df = pd.read_csv(f'../Data/CompleteMerge(2018-2020)[{num_games}].csv').dropna()
col = ['MP', 'FG', 'FGA', '3P', '3PA',
        'FT', 'FTA', 'TRB', 'AST', 'STL', 'BLK',
        'PTS', '+-', 'FDP']
full_df = full_df[full_df.FDP != 0]
x_col = [f'{num_games}_{i}' for i in col] 
X = np.array(full_df[[i for i in x_col]])
y = np.array(full_df[['FDP']])

x_train, x_test, y_train, y_test = train_test_split(X, y, train_size = .8)
    
sns.distplot(y, bins = 10)
plt.xlabel('Fan Duel Points'); plt.title('Distribution of Fantasy Points')

## Linear Model

In [None]:
from sklearn.linear_model import LinearRegression 

#initialize object
linear = LinearRegression()

#fit x_train and y_train to model
linear.fit(x_train, y_train)

#make predictions using x_test and y_test
linear_predictions = linear.predict(x_test)

#plot the actual vs predicted
plt.figure(figsize = (14,5))
actual_predict = list(zip(linear_predictions, y_test))
s_diff = 0
for i in actual_predict: 
    predict = i[0][0]
    actual = i[1][0]
    diff = predict - actual
    if abs(actual - predict) <= 7 and actual - predict > -4: 
        color = 'green' #good prediction
    else: 
        color = 'red'   #bad prediction
    plt.scatter(predict, actual, s=10, c = color)
    s_diff += diff**2
loss = round(s_diff/len(actual_predict),3)
plt.title(f'Linear Model: Predictions vs. Actual | Mean Squared Error: {loss}')
plt.xlabel('Predictions'); plt.ylabel('Actual')
plt.savefig('LinearScatter.png')

In [None]:
input_variables = full_df[[i for i in x_col]].columns.to_list()

coeff = linear.coef_[0]
plt.figure(figsize = (12,5))
plt.bar(input_variables, coeff)
plt.title('Bar Plot Showing the Coefficients for Each Column')
plt.savefig('LinearCoefficients.png')

# Neural Networks

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from keras.models import model_from_json
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.wrappers.scikit_learn import KerasRegressor


num_games = 2
epochs = 50 
batches = 16

full_df = pd.read_csv(f'../Data/CompleteMerge(2018-2020)[{num_games}].csv').dropna()
full_df = full_df[full_df.FDP != 0]
col = ['MP', 'FG', 'FGA', '3P', '3PA',
        'FT', 'FTA', 'TRB', 'AST', 'STL', 'BLK',
        'PTS', 'FDP']
x_col = [f'{num_games}_{i}' for i in col] 
X = np.array(full_df[[i for i in x_col]])
y = np.array(full_df[['FDP']])

x_train, x_test, y_train, y_test = train_test_split(X, y, train_size = .8)

min_max = MinMaxScaler()
standard= StandardScaler()
x_train = min_max.fit_transform(x_train)
x_test = min_max.fit_transform(x_test)

### Sequential AAN

In [None]:
def sequential_AAN(x,y, metrics = ['accuracy', 'mae']): 
    drop = .1

    model = Sequential()
    
    model.add(Dense(x.shape[1],input_dim = x.shape[1], activation = 'relu'))
    model.add(Dropout(drop)) #regularization

    
    model.add(Dense(128 , activation = 'relu'))
    model.add(Dropout(drop))

    model.add(Dense(128, activation = 'relu'))
    model.add(Dropout(drop)) 
    
    model.add(Dense(128, activation = 'relu'))
    model.add(Dropout(drop)) 

    
    model.add(Dense(1, activation = 'linear'))
    model.compile(loss = 'mse', optimizer = 'adam', metrics = metrics)
    return model

In [None]:
seq_ann = sequential_AAN(x_train, y_train)
ann_history = seq_ann.fit(x_train, y_train, epochs = epochs, batch_size = batches, validation_data= (x_test,y_test))
# json = open('models/ANN_Model.json', 'r')
# ann_json = json.read()
# json.close()
# seq_ann = model_from_json(ann_json)
# seq_ann.load_weights('models/ANN_Model_Weights.h5')

In [None]:
ann_json = seq_ann.to_json()
with open('models/ANN_Model.json', 'w') as file:
    file.write(ann_json)
    
seq_ann.save('models/ANN_Model_Weights.h5')

In [None]:
#Plotting the losses for ANN
print(ann_history.history.keys())
ann_df = pd.DataFrame(ann_history.history)
ann_df.tail(5)
fig, ax = plt.subplots(2, figsize = (10,5))
ax[0].plot(ann_df.index, ann_df.loss, label = 'Train')
ax[0].plot(ann_df.index, ann_df.val_loss, label = 'Test')
ax[0].legend()
ax[0].set_ylabel('Mean Squared Error')

ax[1].plot(ann_df.index, ann_df.mae, label = 'Train')
ax[1].plot(ann_df.index, ann_df.val_mae, label = 'Test')
ax[1].legend()
ax[1].set_ylabel('Mean Absolute Error')
plt.xlabel('Epochs')
plt.suptitle('Sequential ANN Accuracy')
plt.savefig('ANNLoss.png')

In [None]:
#make predictions using x_test and y_test for ANN
ann_predictions = seq_ann.predict(x_test)

#plot the actual vs predicted
plt.figure(figsize = (14,5))
actual_predict = list(zip(ann_predictions, y_test))
s_diff = 0
for i in actual_predict: 
    predict = i[0][0]
    actual = i[1][0]
    diff = predict - actual
    if abs(actual - predict) <= 7 and actual - predict > -4:  
        color = 'green' #good prediction
    else: 
        color = 'red'   #bad prediction
    plt.scatter(predict, actual, s=10, c = color)
    s_diff += diff**2
loss = round(s_diff/len(actual_predict),3)
plt.title(f'Sequential ANN Model: Predictions vs. Actual | Mean Squared Error: {loss}')
plt.xlabel('Predictions'); plt.ylabel('Actual')
plt.savefig('ANNScatter.png')

### Multvariate RNN (LSTM)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from keras.models import model_from_json
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


num_games = 2
epochs = 50 
batches = 32

full_df = pd.read_csv(f'../Data/CompleteMerge(2018-2020)[{num_games}].csv').dropna()
full_df = full_df[full_df.FDP != 0]
col = ['MP', 'FG', 'FGA', '3P', '3PA',
        'FT', 'FTA', 'TRB', 'AST', 'STL', 'BLK',
        'PTS', 'FDP']
x_col = [f'{num_games}_{i}' for i in col] + ['FDS','FD_change']
min_max= MinMaxScaler()
standard = StandardScaler()

X = np.array(full_df[[i for i in x_col]])
y = np.array(full_df[['FDP']])


x_train, x_test, y_train, y_test = train_test_split(X, y, train_size = .8)

x_train = min_max.fit_transform(x_train)
x_test = min_max.fit_transform(x_test)
x_test = x_test.reshape((x_test.shape[0],1 , x_test.shape[1]))
x_train = x_train.reshape((x_train.shape[0], 1, x_train.shape[1]))


In [None]:
print(x_test.shape)

In [None]:
def multivar_RNN(x,y, metrics = ['accuracy', 'mae']): 
    model = Sequential()
    units = 32
    drop = .1

    #2nd type
    model.add(LSTM(units, return_sequences = True, input_dim = x.shape[2], activation = 'relu'))
    model.add(Dropout(drop))
    model.add(LSTM(units = units, return_sequences = True, input_dim = x.shape[2], activation = 'relu'))
    model.add(Dropout(drop))
    model.add(LSTM(units = units, return_sequences = False, input_dim = x.shape[2], activation = 'relu'))
    model.add(Dropout(drop))
    model.add(Dense(1))#, activation = 'linear'))
    
    model.compile(optimizer = 'adam', loss = 'mse', metrics = metrics)
    
    return model
        

In [None]:
mult_rnn = multivar_RNN(x_train, y_train)
rnn_history = mult_rnn.fit(x_train, y_train, epochs = epochs, batch_size = batches, verbose = 1, validation_data= (x_test,y_test))

In [None]:
rnn_json = mult_rnn.to_json()
with open('models/RNN_Model.json', 'w') as file:
    file.write(rnn_json)
mult_rnn.save('models/RNN_Model_weights.h5')

# json = open('models/RNN_Model.json', 'r')
# rnn_json = json.read()
# json.close()
# mult_rnn = model_from_json(rnn_json)
# mult_rnn.load_weights('models/RNN_Model_Weights.h5')

In [None]:
print(rnn_history.history.keys())
rnn_df = pd.DataFrame(rnn_history.history)
rnn_df.tail(5)
fig, ax = plt.subplots(2, figsize = (10,5))
ax[0].plot(rnn_df.index, rnn_df.loss, label = 'Train')
ax[0].plot(rnn_df.index, rnn_df.val_loss, label = 'Test')
ax[0].legend()
ax[0].set_ylabel('Mean Squared Error')

ax[1].plot(rnn_df.index, rnn_df.mae, label = 'Train')
ax[1].plot(rnn_df.index, rnn_df.val_mae, label = 'Test')
ax[1].legend()
ax[1].set_ylabel('Mean Absolute Error')
plt.xlabel('Epochs')
plt.suptitle('LSTM RNN Accuracy')
plt.savefig('RNNLoss.png')

In [None]:
#make predictions using x_test and y_test for ANN
rnn_predictions = mult_rnn.predict(x_test)

#plot the actual vs predicted
plt.figure(figsize = (14,5))
actual_predict = list(zip(rnn_predictions, y_test))
s_diff = 0
for i in actual_predict: 
    predict = i[0][0]
    actual = i[1][0]
    diff = predict - actual
    if abs(diff) <= 7: 
        color = 'green' #good prediction
    else: 
        color = 'red'   #bad prediction
    plt.scatter(predict, actual, s=10, c = color)
    s_diff += diff**2
loss = round(s_diff/len(actual_predict),3)
plt.title(f'LSTM RNN Model: Predictions vs. Actual | Mean Squared Error: {loss}')
plt.xlabel('Predictions'); plt.ylabel('Actual')
plt.savefig('RNNScatter.png')