In [2]:
import pandas as pd
df = pd.read_csv('data_new_2.csv')

In [3]:
key_moments = ['FB', 'MB', 'FS', 'MS', 'FT', 'MT', '360', '387', '414', '441', '468', '495', '522']

iqr_bounds = {}

In [4]:
valid_laps_df = df[df['INVALID_LAP'] == False]
for km in key_moments:
    km_columns = [col for col in valid_laps_df.columns if col.startswith(km)]

    iqr_bounds[km] = {}
    
    for col in km_columns:
        Q1 = valid_laps_df[col].quantile(0.25)
        Q3 = valid_laps_df[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        
        iqr_bounds[km][col] = {'lower_bound': lower_bound, 'upper_bound': upper_bound}

iqr_bounds_df = pd.DataFrame.from_dict({(km, col): bounds 
                                        for km, km_dict in iqr_bounds.items() 
                                        for col, bounds in km_dict.items()},
                                       orient='index')

print(iqr_bounds_df)

                             lower_bound  upper_bound
FB  FB_SPEED                  282.500000   350.500000
    FB_LAP_DIST               232.497254   300.501648
    FB_BRAKE                   -0.375943     1.470731
    FB_LAP_DIST_FROM_APEX_1   237.867703   305.175230
MB  MB_LAP_DIST               197.206512   399.893768
    MB_BRAKE                    0.977798     1.013321
    MB_LAP_DIST_FROM_APEX_1   203.524211   402.708009
FS  FS_DIST_FROM_LEFT           0.398082     5.392626
    FS_SPEED                  141.500000   273.500000
    FS_LAP_DIST               309.084450   351.253456
    FS_BRAKE                    0.366760     1.379944
    FS_STEERING                 0.056866     0.117388
    FS_LAP_DIST_FROM_APEX_1   313.496385   354.335157
MS  MS_DIST_FROM_LEFT          -1.036827    22.279684
    MS_SPEED                   94.375000   197.375000
    MS_LAP_DIST               337.841557   433.863254
    MS_THROTTLE                -0.751244     1.252073
    MS_BRAKE                

In [5]:
import pandas as pd
import statsmodels.api as sm
from sklearn.model_selection import train_test_split

valid_laps_df = df[df['INVALID_LAP'] == False]

predictors = [
    'FB_LAP_DIST', 'MB_LAP_DIST', 'FS_LAP_DIST', 'MS_LAP_DIST', 
    'FT_LAP_DIST', 'MT_LAP_DIST', 'FS_DIST_FROM_LEFT', 'MS_DIST_FROM_LEFT', 
    'FT_DIST_FROM_LEFT', 'MT_DIST_FROM_LEFT'
]
X = valid_laps_df[predictors]
y = valid_laps_df['SECTION_TIME_MS']

X = sm.add_constant(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = sm.OLS(y_train, X_train).fit()

print(model.summary())



                            OLS Regression Results                            
Dep. Variable:        SECTION_TIME_MS   R-squared:                       0.631
Model:                            OLS   Adj. R-squared:                  0.616
Method:                 Least Squares   F-statistic:                     43.18
Date:                Tue, 05 Nov 2024   Prob (F-statistic):           3.65e-49
Time:                        10:43:08   Log-Likelihood:                -2273.5
No. Observations:                 264   AIC:                             4569.
Df Residuals:                     253   BIC:                             4608.
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const              1.551e+04   1532.92