The file `car-stopping-distances.csv` contains data on stopping distances (in meters) for cars traveling at various speeds (in kilometers per hour).

Would a linear or quadratic model provide a better fit for this dataset?

In [None]:
import pandas as pd 
import matplotlib.pyplot as plt 
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score 

# Creation of classes 
class LinearModel:
    def __init__(self, model_name=''):
        self.model_name = model_name 
    def fit(self, x, y):
        x = pd.DataFrame(x)
        linear_model = LinearRegression().fit(x,y)
        y_pred = linear_model.predict(x)
        self.slope = linear_model.coef_[0]
        self.intercept = linear_model.intercept_ 
        self.rsquared = r2_score(y, y_pred)
    def predict(self, x):
        return self.slope * x + self.intercept
    def print_model_info(self):
        print(f'The goodness of the linear model is : {self.rsquared:.3f}.')

class QuadraticModel:
    def __init__(self, model_name=''):
        self.model_name = model_name 
    def fit(self, x, y):
        x = pd.DataFrame(x)
        quadratic = PolynomialFeatures(degree=2)
        quad_features = quadratic.fit_transform(x)
        quad_model = LinearRegression().fit(quad_features, y)
        y_pred = quad_model.predict(quad_features)
        self.a = quad_model.coef_[2]
        self.b = quad_model.coef_[1]
        self.c = quad_model.intercept_
        self.rsquared = r2_score(y, y_pred)
    def predict(self, x):
        return self.a*x**2 + self.b*x + self.c
    def print_model_info(self):
        print(f'The goodness of the linear model is : {self.rsquared:.3f}.')

In [None]:
# Concat all df
df0_1 = pd.read_csv('ebike-data-low-speed.csv')
df0_2 = pd.read_csv('ebike-stopping-distances.csv')
df0_3 = pd.read_csv('ebike-data-high-speed.csv')
df_concat = pd.concat([df0_1, df0_2])
df = pd.concat([df_concat, df0_3]).reset_index().drop(columns='index')

# Test the linear model
linear_test = LinearModel('test')
linear_test.fit(x=df['speed'], y=df['distance'])
linear_test.print_model_info()

# Test the quadratic model
quad_test = QuadraticModel('test2')
quad_test.fit(x=df['speed'], y=df['distance'])
quad_test.print_model_info()


The goodness of the linear model is : 0.952.
The goodness of the linear model is : 0.986.
