# Evaluation
Below are a series of graphs used to evaluate the correctness of my dissertation.

In [2]:
import pickle
import random
from os import makedirs, path, remove

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.gaussian_process.kernels import RBF, ConstantKernel
from sklearn.gaussian_process import GaussianProcessRegressor


from ModelUsage.get_model import get_trained_model, convert_to_input, convert_to_training_data

FileNotFoundError: [Errno 2] No such file or directory: 'Data\\testing_CSVs\\test_2016_09_18\\drivers.txt'

In [None]:
model_location = None
store_location = 'Data\\Model'
csv_location = 'Data\\testing_CSVs\\First52020.csv'

event_date = '09_13'
qualifying_session = 'Q2'

if qualifying_session == 'Q1':
    train_to = 453 # Q1
    predict_from = 454 # Q1
else:
    train_to = 484 # Q2
    predict_from = 485 # Q2


data = pd.read_csv(csv_location)
data = data[0:train_to]

data.to_csv('Data\\temp.csv', index=False)

In [None]:
dataframe = pd.read_csv('Data\\temp.csv')
indices = dataframe.index[(dataframe['Date'] == event_date)
 & (dataframe['SessionName'] == qualifying_session)].tolist()

X_kernel = random.sample(indices, len(indices)//2)

dataframe, X_kernel = dataframe.drop(index=X_kernel), dataframe.loc[X_kernel]


X_train_kernel, y_train_kernel = convert_to_training_data(X_kernel)
X_train, y_train = convert_to_training_data(dataframe)
if model_location is not None:
    # Model folder supplied
    model = pickle.load(open(f'{model_location}\\model.pkl', 'rb'))
    data = pd.read_csv(f'{model_location}\\normalisation_constants.csv', index_col=0)
    normalisation_constants = data.to_dict(orient='index')
else:
    # Model not supplied, train a model on the given CSV file
    kernel = RBF(length_scale=1)
    model, normalisation_constants = get_trained_model(kernel, 'Data\\temp.csv', return_constants=True)
remove('Data\\temp.csv')

In [None]:
if store_location is not None:
    # Store folder supplied
    if not path.exists(store_location):
        makedirs(store_location)
    pickle.dump(model, open(f'{store_location}\\model.pkl', 'wb'))
    constant_dataframe = pd.DataFrame.from_dict(normalisation_constants, orient='index')
    constant_dataframe.to_csv(f'{store_location}\\normalisation_constants.csv', index=True)

In [None]:
model.kernel_

In [None]:
data = pd.read_csv(csv_location)
data = data[(data['Date'] == event_date) & (data['SessionName'] == qualifying_session)]

grouped_by_driver = data.groupby('DriverName')
driver_fastest_laps = grouped_by_driver['LapTime'].min().sort_values()



drivers_through = 15 if qualifying_session == 'Q1' else \
    (10 if qualifying_session == 'Q2' else 5)

cut_off_time = np.sort(driver_fastest_laps.to_numpy())[drivers_through]
order = list(zip(driver_fastest_laps, driver_fastest_laps.index))

In [None]:
prediction = data.loc[predict_from - 1:]

input_to_model = convert_to_input(prediction, normalisation_constants)

output = (prediction['LapTime'] - prediction['ExpectedTime']).to_numpy()

In [None]:
print(model.predict(input_to_model))
print(output)

print(model.predict(input_to_model) - output)

In [None]:
from Model.train_hyperparameters import mean_squared_error

print(mean_squared_error(model, input_to_model, output))

In [None]:
from Model.train_hyperparameters import random_search as train_hyperparameters
kernel = ConstantKernel() * RBF() + RBF()
new_kernel, loss = train_hyperparameters(GaussianProcessRegressor, kernel, (1e-10, 1e10),
                      X_train, y_train, X_train_kernel, y_train_kernel, size=3, loops=10)

print(new_kernel)

In [None]:
trained_model = GaussianProcessRegressor(kernel=new_kernel, optimizer=None)
trained_model.fit(X_train, y_train)

print(prediction)
test = pd.concat([prediction[:1]] * 70)

test['LapsCompleted'] = np.arange(70)

vals = trained_model.predict(convert_to_input(test, normalisation_constants))

plt.plot(np.arange(70), vals, linestyle='dotted')
plt.show()




In [None]:
mean, std = trained_model.predict(input_to_model, return_std=True)

print(np.abs(trained_model.predict(input_to_model) - output))

In [None]:
print(mean_squared_error(trained_model, input_to_model, output))

In [None]:
print(order)

In [None]:
driver_name = 'Lewis Hamilton'

driver_position = 1
for pos, t in enumerate(order, start=1):
    if t[1] == driver_name:
        driver_position = pos
        break

driver_position

In [None]:
drivers_to_knock_out = (drivers_through - driver_position) + 1
starting_lap = max(dataframe.iloc[-1]['LapsCompleted'], X_kernel.max(axis='rows')['LapsCompleted']) + 1
print(starting_lap)

In [None]:
drivers_to_check = pd.DataFrame()
expected_time = pd.DataFrame()

for driver in driver_fastest_laps.index.values:
    drivers_to_check = pd.concat((drivers_to_check, data[data['DriverName'] == driver].iloc[:1]))
    expected_time = pd.concat((expected_time, data[data['DriverName'] == driver].iloc[:1]['ExpectedTime']))
expected_time = expected_time.reset_index(drop=True)
drivers_to_check = drivers_to_check.reset_index(drop=True)
drivers_to_check['TyreCompound'] = 'Soft'
drivers_to_check['TyreUsage'] = 1

## Without changing laps

In [None]:
check = drivers_to_check.copy()
check['LapsCompleted'] = starting_lap
means, stds = trained_model.predict(convert_to_input(check, normalisation_constants), return_std=True)

means = means + expected_time.to_numpy().squeeze()
values = None
samples = 10000
for mean, std in zip(means, stds):
    if values is None:
        values = np.random.normal(mean, std, samples)
    else:
        values = np.vstack((values, np.random.normal(mean, std, samples)))

times_sorted = np.sort(np.reshape(values, (drivers_through + 5, -1)), axis=0)

In [None]:
print(cut_off_time)
mean = np.mean(times_sorted[drivers_through - 1])
std = np.std(times_sorted[drivers_through - 1])

confidence_interval = mean - 1.96*std, mean + 1.96*std

np.mean(times_sorted[drivers_through - 1]) - cut_off_time

print(mean)
print(mean - cut_off_time)
print(confidence_interval[0] - cut_off_time, confidence_interval[1] - cut_off_time)

In [None]:
repeat_factor = 5000
laps = np.arange(drivers_through + 5, dtype=int)
total_laps = np.tile(laps, repeat_factor).reshape(repeat_factor, -1)

rng = np.random.default_rng()
permuted_laps = rng.permuted(total_laps, axis=1) + starting_lap

In [None]:
duplicated_data = pd.concat([drivers_to_check] * repeat_factor)
duplicated_data['LapsCompleted'] = permuted_laps.reshape(-1)

In [None]:
convert_to_input(duplicated_data, normalisation_constants)

In [None]:
predictions = trained_model.predict(convert_to_input(duplicated_data, normalisation_constants))
expected_times = np.tile(expected_time.to_numpy().squeeze(), repeat_factor)
predicted_time = expected_times + predictions

In [None]:
times_sorted = np.sort(np.reshape(predicted_time, (drivers_through + 5, -1)), axis=0)
times_sorted

In [None]:
print(cut_off_time)
mean = np.mean(times_sorted[drivers_through - 1])
std = np.std(times_sorted[drivers_through - 1])

confidence_interval = mean - 1.96*std, mean + 1.96*std

np.mean(times_sorted[drivers_through - 1]) - cut_off_time

print(mean)
print(mean - cut_off_time)
print(confidence_interval[0] - cut_off_time, confidence_interval[1] - cut_off_time)

## Faster version

In [None]:
check = drivers_to_check.copy()
check['LapsCompleted'] = starting_lap
check = pd.concat([check] * (drivers_through + 5)).reset_index()
check['LapsCompleted'] += laps.repeat(drivers_through + 5)


model_input = convert_to_input(check, normalisation_constants)
means, stds = trained_model.predict(convert_to_input(check, normalisation_constants), return_std=True)

driver_probs = dict()
for count, driver in enumerate(check['DriverName'].unique()):
    indices = check[check['DriverName'] == driver].index.tolist()
    predictions_for_driver = expected_time.to_numpy().squeeze()[count] + means[indices]
    driver_probs[driver] = {lap: (predictions_for_driver[lap], stds[indices][lap]) for lap in range(len(predictions_for_driver))}

cutoff_times = np.array([])
samples = 100
orders = 1000
for order in range(orders):
    random.shuffle(laps)
    values = None
    for count, driver in enumerate(check['DriverName'].unique()):
        mean, std = driver_probs[driver][laps[count]]
        if values is None:
            values = np.random.normal(mean, std, samples)
        else:
            values = np.vstack((values, np.random.normal(mean, std, samples)))
    times_sorted = np.sort(values, axis=0)
    cutoff_times = np.concatenate([cutoff_times, times_sorted[drivers_through-1]])

mean = np.mean(cutoff_times)
std = np.std(cutoff_times)

confidence_interval = mean - 1.96*std, mean + 1.96*std
print(mean, cut_off_time)
print(mean - cut_off_time)
print(confidence_interval[0] - cut_off_time, confidence_interval[1] - cut_off_time)