In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dropout, Activation, Dense , LSTM, Flatten ,TimeDistributed
from keras.layers.convolutional import Conv1D , MaxPooling1D
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from tensorflow.keras import regularizers
from keras.regularizers import L1L2
import math

ModuleNotFoundError: No module named 'keras'

In [None]:
def scale_data_toLSTM(X ,Y):
    # load dataset
    X =  pd.DataFrame(X)
    Y = pd.DataFrame(Y)
    values = X.values

    # integer encode direction
    # encoder = LabelEncoder() #Label encoding (one-hot) for player position at column no.= 9
    # values[:,position_col] = encoder.fit_transform(values[:,position_col])

    # ensure all data is float
    values = values.astype('float32')
    # normalize features
    scaler = MinMaxScaler(feature_range=(0, 1))
    X_scaled = scaler.fit_transform(values)
    Y_scaled = scaler.fit_transform(Y.values)
    
    # reshape input to be 4D [samples, subsequences, timesteps, features] and target data to be [samples,]
    X_scaled = X_scaled.reshape(X_scaled.shape[0],1,1,X_scaled.shape[1])
    Y_scaled = Y_scaled.reshape((Y_scaled.shape[0],))
    
    return (X_scaled , Y_scaled, scaler)

In [None]:
data = pd.read_csv(r'C:\Users\Asus\PycharmProjects\FPL Project\my_data\final_dataset.csv)

In [None]:
data = data.dropna()

In [None]:
data_wo_gk = data.drop(data[(data['position_name'] == 'GK')].index)

In [None]:
data_refined = data_wo_gk.loc[:,[i for i in list(data_wo_gk.columns) if i not in ['player','kickoff_time', 'position',
                                                                      'penalties_saved', 'saves', 'transfers_balance']]]

In [None]:
data_refined = pd.get_dummies(data_refined, columns = ['position_name', 'gw','opponent_team', 'team','season'], drop_first=True)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(data_refined.loc[:, data_refined.columns != 'total_points'], data_refined['total_points'], test_size=0.2, random_state=0)

In [None]:
test_X , test_y,scaler = scale_data_toLSTM(X_test, Y_test )
train_X , train_y,scaler = scale_data_toLSTM(X_train, Y_train)

In [None]:
# define model
model = Sequential()
model.add(TimeDistributed(Conv1D(filters=15, kernel_size=1, activation='relu'), input_shape=(1,1, 123))) #,  activity_regularizer=l1(0.001)
model.add(TimeDistributed(Dense(1,kernel_regularizer = regularizers.l2(0.01))))
model.add(TimeDistributed(MaxPooling1D(pool_size=1)))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(10))
model.add(Dense(1))
model.add(Dropout(0.03))
model.compile(optimizer='adam', loss='mse')
model.summary()

In [None]:
NO_BATCH_SIZE = 100 
NO_EPOCHS = 30

In [None]:
# fit model
history = model.fit(train_X, train_y, epochs=NO_EPOCHS, verbose=1, batch_size=NO_BATCH_SIZE,
                    validation_data=(test_X, test_y))

In [None]:
# plot history
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.xlabel('# of epoch')
pyplot.ylabel('Loss')
pyplot.title("CNN-LSTM Model Training Curve")
pyplot.show()

In [None]:
test_X , test_y,scaler = scale_data_toLSTM(X_test, Y_test )
train_X , train_y,scaler = scale_data_toLSTM(X_train, Y_train)

In [None]:
yhat = model.predict(test_X)
test_X = test_X.reshape(((test_X.shape[0],test_X.shape[3])))

In [None]:
# invert scaling for forecast
inv_yhat = np.concatenate((yhat, test_X[:, 1:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]
# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = np.concatenate((test_y, test_X[:, 1:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]
# calculate RMSE
mse = mean_squared_error(inv_y, inv_yhat)
print('Test MSE: %.3f' % mse)

In [None]:
np.median(inv_yhat)

In [None]:
pred_df = pd.DataFrame(data=inv_yhat)
test_df = pd.DataFrame(data=test_X)

In [None]:
forecast_df = pd.concat([test_df, pred_df], axis = 1 )

In [None]:
pred_df.info(verbose = True)

In [None]:
pred_df.describe()