In [14]:
import pandas as pd
df = pd.read_csv('data_new_2.csv')

In [15]:
key_moments = ['FB', 'MB', 'FS', 'MS', 'FT', 'MT', '360', '387', '414', '441', '468', '495', '522']

iqr_bounds = {}

In [16]:
for km in key_moments:
    km_columns = [col for col in df.columns if col.startswith(km)]

    iqr_bounds[km] = {}
    
    for col in km_columns:
        Q1 = df[col].quantile(0.25)
        Q3 = df[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        
        iqr_bounds[km][col] = {'lower_bound': lower_bound, 'upper_bound': upper_bound}

iqr_bounds_df = pd.DataFrame.from_dict({(km, col): bounds 
                                        for km, km_dict in iqr_bounds.items() 
                                        for col, bounds in km_dict.items()},
                                       orient='index')

print(iqr_bounds_df)

                             lower_bound  upper_bound
FB  FB_SPEED                  254.000000   366.000000
    FB_LAP_DIST               217.618446   316.419777
    FB_BRAKE                   -0.692996     1.463086
    FB_LAP_DIST_FROM_APEX_1   222.047970   321.525017
MB  MB_LAP_DIST               202.176788   409.886505
    MB_BRAKE                    0.417455     1.349527
    MB_LAP_DIST_FROM_APEX_1   212.501229   400.427997
FS  FS_DIST_FROM_LEFT          -0.316969     6.802655
    FS_SPEED                  124.000000   300.000000
    FS_LAP_DIST               303.268677   355.638794
    FS_BRAKE                   -0.549454     1.929673
    FS_STEERING                 0.055500     0.119081
    FS_LAP_DIST_FROM_APEX_1   306.655903   359.485407
MS  MS_DIST_FROM_LEFT          -6.917424    25.192909
    MS_SPEED                   84.500000   208.500000
    MS_LAP_DIST               338.137024   444.072083
    MS_THROTTLE                -0.669078     1.115129
    MS_BRAKE                

In [17]:
import pandas as pd
import statsmodels.api as sm
from sklearn.model_selection import train_test_split

valid_laps_df = df[df['INVALID_LAP'] == False]

predictors = [
    'FB_LAP_DIST', 'MB_LAP_DIST', 'FS_LAP_DIST', 'MS_LAP_DIST', 
    'FT_LAP_DIST', 'MT_LAP_DIST', 'FS_DIST_FROM_LEFT', 'MS_DIST_FROM_LEFT', 
    'FT_DIST_FROM_LEFT', 'MT_DIST_FROM_LEFT'
]
X = valid_laps_df[predictors]
y = valid_laps_df['SECTION_TIME_MS']

X = sm.add_constant(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = sm.OLS(y_train, X_train).fit()

print(model.summary())



                            OLS Regression Results                            
Dep. Variable:        SECTION_TIME_MS   R-squared:                       0.631
Model:                            OLS   Adj. R-squared:                  0.616
Method:                 Least Squares   F-statistic:                     43.18
Date:                Sun, 03 Nov 2024   Prob (F-statistic):           3.65e-49
Time:                        21:08:39   Log-Likelihood:                -2273.5
No. Observations:                 264   AIC:                             4569.
Df Residuals:                     253   BIC:                             4608.
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const              1.551e+04   1532.92