# Test Field

In [24]:
import datetime
import re
from typing import List

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from scipy.stats import ttest_ind
from scipy.stats import mannwhitneyu
from sklearn.utils import resample

In [25]:
import final_func as fn
import importlib
importlib.reload(fn)

<module 'final_func' from '/Users/andrewmo/Documents/Docs - Jupiter/Projects/Class Individuals/2022Spring_Finals/final_func.py'>

In [26]:
# Load data
pit = pd.read_csv('data/pit_stops.csv')
results = pd.read_csv('data/results.csv')
status = pd.read_csv('data/status.csv')
lap = pd.read_csv("data/lap_times.csv")

In [27]:
# Process the data files
merge_df = fn.merge_data([pit, results, status])
merge_df = fn.process_data(merge_df)
df_dict = fn.pit_stop_group(merge_df)

In [28]:
df_front, df_back = fn.front_back_division(merge_df, top_num=5)

In [31]:
def comparison_plot(list_1: [pd.DataFrame], list_2: [pd.DataFrame], select_col='lap_prop',
                    show_mean=True, show_description=True, show_divide=True, non_para=False, save_fig=False):

    bins = np.linspace(0, 1, 50)
    color_bin = ['tab:blue', 'tab:orange', 'tab:red']
    color_bin2 = ['deepskyblue', 'crimson', 'lavender']

    plot_index = [[1, 1], [2, 1], [2, 2], [3, 1], [3, 2], [3, 3]]
    plot_num = 6

    for _i in range(plot_num):
        _total = plot_index[_i][0]  # total pit stops
        _pit = plot_index[_i][1]  # pit stop number
        df_f = list_1[_i][select_col]  # front
        df_b = resample(list_2[_i][select_col],
                        replace=True, n_samples=len(df_f), random_state=123)  # back
        # print('-' * 88)

        df_f_mean = round(df_f.mean(), ndigits=3)
        df_b_mean = round(df_b.mean(), ndigits=3)
        if show_description:
            if not non_para:
                p_value = ttest_ind(df_f, df_b).pvalue
            else:
                p_value = mannwhitneyu(df_f, df_b).pvalue
            print(f'Total Pits: {_total}, no.{_pit} pit, p value={p_value}')

In [32]:
comparison_plot(df_front,df_back)

Total Pits: 1, no.1 pit, p value=0.6120208311353951
Total Pits: 2, no.1 pit, p value=0.06260995721012505
Total Pits: 2, no.2 pit, p value=0.2175452218698461
Total Pits: 3, no.1 pit, p value=0.045088810955493594
Total Pits: 3, no.2 pit, p value=0.005040876100088418
Total Pits: 3, no.3 pit, p value=0.0004646569255695769


In [42]:
def avg_deviation_plot(list_1: [pd.DataFrame], list_2: [pd.DataFrame], save_fig=False):

    bins = np.linspace(0, 1, 50)
    color_bin = ['tab:blue', 'tab:orange', 'tab:red']
    color_bin2 = ['deepskyblue', 'crimson', 'lavender']

    num = len(list_1)
    for i in range(num):
        _df_front = list_1[i]['abs_deviation_mean']
        _df_back = list_2[i]['abs_deviation_mean']

        _df_back = resample(_df_back, replace=True, n_samples=len(_df_front), random_state=123)


        _df_front_mean = round(_df_front.mean(), ndigits=3)
        _df_back_mean = round(_df_back.mean(), ndigits=3)

        sig_level = 0.05
        p_value = mannwhitneyu(_df_front, _df_back).pvalue
        print(f'Total Pit Stops = {i + 1}')
        print(f'     Mann-Whitney U rank test p value={p_value}')

        if p_value < sig_level:
            print('     Means of Average Deviation - ')
            print(f'        Higher Ranking: {_df_front_mean}, Lower Ranking: {_df_back_mean}')

In [43]:
df_front, df_back = fn.front_back_division(merge_df, select_col='abs_deviation_mean', top_num=5)

In [44]:
avg_deviation_plot(df_front, df_back)

Total Pit Stops = 1
     Mann-Whitney U rank test p value=0.0026914155012888615
     Means of Average Deviation - 
        Higher Ranking: 0.122, Lower Ranking: 0.147
Total Pit Stops = 2
     Mann-Whitney U rank test p value=0.006222420075629702
     Means of Average Deviation - 
        Higher Ranking: 0.103, Lower Ranking: 0.118
Total Pit Stops = 3
     Mann-Whitney U rank test p value=0.02260220339545907
     Means of Average Deviation - 
        Higher Ranking: 0.101, Lower Ranking: 0.121
