In [None]:
import sys
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
from sklearn.preprocessing import PowerTransformer, FunctionTransformer, QuantileTransformer
from sklearn.metrics import mean_squared_error

sys.path.append('../Scripts')
from Data_Processing import DataProcessing

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

In [None]:
df = DataProcessing('../Data/train.csv')

In [None]:
df

In [None]:
#df = df.loc[df['Lap_Time'] != 0]

y = df['Lap_Time']
X = df.drop(columns=['Lap_Time'])

obj_columns = list(X.select_dtypes(include=object).columns)

obj_columns.append('Lap_Improvement')
obj_columns.append('Lap_Number')
obj_columns.append('S1_Improvement')
obj_columns.append('S2_Improvement')
obj_columns.append('S3_Improvement')

num_columns = list(X.select_dtypes(include='number').columns)
num_columns.remove('Lap_Number')
num_columns.remove('Lap_Improvement')
num_columns.remove('S1_Improvement')
num_columns.remove('S2_Improvement')
num_columns.remove('S3_Improvement')

# Scalers

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler, OrdinalEncoder
import joblib

In [None]:
#pt = PowerTransformer(method='yeo-johnson')
#X[num_columns] = pt.fit_transform(X[num_columns])
X

In [None]:
column_transformer = ColumnTransformer(
[('num', StandardScaler(), num_columns),
('obj', OneHotEncoder(), obj_columns)],
remainder='passthrough')

trans_X = column_transformer.fit_transform(X)

In [None]:
joblib.dump(column_transformer, '../Models/Column_Transformer_NN.pkl')
#joblib.dump(pt, '../Models/Power_Transformer.pkl')

In [None]:
trans_X = trans_X.toarray()
y = np.asarray(y).astype(float)

In [None]:
scaler = StandardScaler()
y = scaler.fit_transform(y.reshape(-1,1))

joblib.dump(scaler, '../Models/NN_Y_Scaler.pkl')

# Train Test Split

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(trans_X, y, random_state=42, test_size=0.2, shuffle=False)

In [None]:
X_train.shape

# Neural Network

In [None]:
from tensorflow import keras
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
from keras import backend as K
from keras.callbacks import EarlyStopping
from datetime import datetime
from tensorflow import keras
from tensorflow.keras.losses import MeanSquaredLogarithmicError

In [None]:
def root_mean_squared_log_error(y_true, y_pred):
        return np.sqrt(mean_squared_log_error(y_true, y_pred))

In [None]:
mc = ModelCheckpoint(f'../Models/NN_model_test.h5', monitor='val_loss', mode='min', verbose=1, save_best_only=True)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    verbose=0,
    mode="auto",
    baseline=None,
    restore_best_weights=True)

model = keras.Sequential([
    keras.layers.Dense(100, activation='relu', input_dim=124),
    keras.layers.LeakyReLU(500),
    keras.layers.LeakyReLU(800),
    keras.layers.LeakyReLU(200),
    keras.layers.Dense(200, activation='relu'),
    keras.layers.Dense(10, activation='relu'),
    keras.layers.Dense(1, activation='relu')
])

In [None]:
opt = keras.optimizers.Adam(learning_rate=0.0001)

model.compile(optimizer=opt,
              loss='mean_squared_logarithmic_error',
              metrics=['mean_squared_logarithmic_error'])

In [None]:
history = model.fit(
    X_train,
    y_train,
    batch_size=200,
    epochs=5000,
    validation_data=(X_test, y_test),
    callbacks=[mc, early_stopping],
    shuffle=False,
    steps_per_epoch=5
)

In [None]:
y_predicted_test = model.predict(test_x).reshape(-1)
test_y = test_y.astype(float)
y_predicted_test = y_predicted_test.astype(float)
root_mean_squared_log_error(test_y, y_predicted_test)

In [None]:
results = pd.DataFrame()
y_pred = scaler.inverse_transform(model.predict(X_test))
results['Predicted'] = ((1 / y_pred) - 1).ravel()
y_actual = scaler.inverse_transform(y_test)
results['Actual'] = ((1 / y_actual) - 1).ravel()
results['Difference'] = abs(results['Predicted'] - results['Actual'])

In [None]:
scaler.inverse_transform(model.predict(X_test))

In [None]:
results

In [None]:
results['Difference'].mean()

In [None]:
mean_squared_error(results['Actual'], results['Predicted'], squared=False)

In [None]:
root_mean_squared_log_error(results['Actual'], results['Predicted'])

In [None]:
df.hist(figsize=(20,20))

In [None]:
df['Time_Minutes'].hist(bins=50, figsize=(10,10))

In [None]:
df[df['S3'] < 1000].sort_values(by='S3', ascending=False)

In [None]:
log_cols = [
    'Air_Temp',
    'Lap_Number',
    
    
]

root_cols = [
    'Elapsed',
    'Wind_Speed',
    'Track_Temp',
    
    
]

In [None]:
df

In [None]:
df