In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import root_mean_squared_error
import matplotlib.pyplot as plt
from pygam import LinearGAM, s, f
from gam_reg import GAMRegressor
import numpy as np

df = pd.read_csv("./correlation-data/r22-24_processed.csv")
df["LapTime"] = df["LapTime"] - df["racemean"]
df = df.drop(columns=["racemean", "SpL", "CornerForce", "TempRatio"])
df_dry = df[df["Rainfall"] == 0.0]
df_dry = df_dry.drop(columns=["Rainfall"])
X_dry = df_dry.drop(columns=['LapTime'])
y_dry = df_dry['LapTime']
df_wet = df[df["Rainfall"] == 1.0]
df_wet = df_wet.drop(columns=["Rainfall"])
X_wet = df_wet.drop(columns=['LapTime'])
y_wet = df_wet['LapTime']
df = df.drop(columns=["Rainfall"])

X = df.drop(columns=['LapTime'])
y = df['LapTime']

categorical_feats = ['Compound', "Rainfall"]
categorical_feats = [c for c in X.columns if c.startswith(tuple(categorical_feats))]
numerical_features = [c for c in X.columns if not c.startswith(tuple(categorical_feats))]

# preprocessor_dry = ColumnTransformer(transformers=[
#     ("scaler", StandardScaler(), numerical_features),
# ],remainder='passthrough')

def make_preprocessor(num_feats : list[str]):
    preprocessor = ColumnTransformer(
        transformers=[
            ("scaler", StandardScaler(), num_feats),
        ],
        remainder='passthrough'
    )
    return preprocessor

def create_gam_pipeline(all_feats : list[str], numerical_feats : list[str]):
    prep = make_preprocessor(numerical_feats)
    num_feat_set = set(numerical_feats)
    terms = None
    for i in range(len(all_feats)):
        feat = all_feats[i]
        if terms is None:
            terms = s(i)
        else:
            terms += s(i)
    gam = GAMRegressor(terms=terms,lam=np.linspace(0.001, 0.01, 20))
    pipeline = Pipeline(steps=[
        ("preprocessor", prep),
        ("gam", gam)
    ])
    return pipeline

In [2]:
X_dry_train, X_dry_test, y_dry_train, y_dry_test = train_test_split(X_dry, y_dry, test_size=0.2, random_state=57372)
X_wet_train, X_wet_test, y_wet_train, y_wet_test = train_test_split(X_wet, y_wet, test_size=0.2, random_state=235246245)

In [4]:
dry_pipeline = create_gam_pipeline(X_dry.columns, numerical_features)
wet_pipeline = create_gam_pipeline(X_wet.columns, numerical_features)
dry_pipeline.fit(X_dry_train, y_dry_train)

dry_gam = dry_pipeline.named_steps['gam'].gam_

[38;2;0;255;0m100%[39m [38;2;0;255;0m(20 of 20)[39m |########################| Elapsed Time: 0:02:04 Time:  0:02:040611


In [5]:
dry_gam.summary()

LinearGAM                                                                                                 
Distribution:                        NormalDist Effective DoF:                                    241.0876
Link Function:                     IdentityLink Log Likelihood:                                -70682.2223
Number of Samples:                        42911 AIC:                                           141848.6199
                                                AICc:                                          141851.3783
                                                GCV:                                                1.4957
                                                Scale:                                              1.4806
                                                Pseudo R-Squared:                                   0.4764
Feature Function                  Lambda               Rank         EDoF         P > x        Sig. Code   
s(0)                              [0.

 
Please do not make inferences based on these values! 

Collaborate on a solution, and stay up to date at: 
github.com/dswah/pyGAM/issues/163 

  dry_gam.summary()


In [6]:

# Find RMSE
score = root_mean_squared_error(y_dry_test, dry_pipeline.predict(X_dry_test))
print(f"Dry GAM RMSE: {score}")

Dry GAM RMSE: 4.036027532028268
