In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import scipy

In [2]:
df = pd.DataFrame([[2,3,4,5,6,1.5,5,7,8,10],[50,70,90,100,110,40,110,130,145,180]]).transpose()
df.columns = ["Hours", "Money"]
df

Unnamed: 0,Hours,Money
0,2.0,50.0
1,3.0,70.0
2,4.0,90.0
3,5.0,100.0
4,6.0,110.0
5,1.5,40.0
6,5.0,110.0
7,7.0,130.0
8,8.0,145.0
9,10.0,180.0


In [3]:
X = df.drop('Money', axis=1)
y = df['Money']

In [4]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler

poly_converter = PolynomialFeatures(degree=3, include_bias=False)
poly_features = poly_converter.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
scaler.fit(X_train)
scaled_X_train = scaler.transform(X_train)
scaled_X_test = scaler.transform(X_test)

In [6]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import cross_val_score, cross_validate

alphas = [10, 100]

for alpha in alphas:
    ridge_model = Ridge(alpha=alpha)
    scores = cross_val_score(ridge_model, scaled_X_train, y_train, scoring='neg_mean_absolute_error', cv=5)
    print(f'alpha: {alpha} mean metric: {abs(scores.mean())}')

alpha: 10 mean metric: 12.707338437589433
alpha: 100 mean metric: 18.17224787992074


In [17]:
alphas = [1, 10, 100]

for alpha in alphas:
    ridge_model = Ridge(alpha=alpha)
    scores = cross_validate(ridge_model, scaled_X_train, y_train, 
                            scoring=['neg_mean_squared_error', 'neg_mean_absolute_error'], cv=5)
    
    scores_df = pd.DataFrame(scores)
    print(f'alpha: {alpha} - Alpha DF:')
    print(scores_df)
    print()
    print(f'Mean MSE: {abs(scores_df["test_neg_mean_squared_error"].mean())}')
    print(f'Mean MAE: {abs(scores_df["test_neg_mean_absolute_error"].mean())}')
    
    print()
    print()

alpha: 1 - Alpha DF:
   fit_time  score_time  test_neg_mean_squared_error  \
0  0.001970    0.001216                  -486.127873   
1  0.012721    0.001616                   -17.569516   
2  0.000926    0.000484                    -9.073494   
3  0.000549    0.000520                    -1.577107   
4  0.000530    0.000430                  -179.466880   

   test_neg_mean_absolute_error  
0                    -17.524291  
1                     -4.181589  
2                     -3.012224  
3                     -1.255829  
4                    -13.396525  

Mean MSE: 138.76297405622068
Mean MAE: 7.874091720400399


alpha: 10 - Alpha DF:
   fit_time  score_time  test_neg_mean_squared_error  \
0  0.000664    0.000490                  -989.163859   
1  0.000497    0.000413                 -1122.047680   
2  0.000474    0.000404                    -0.183496   
3  0.000480    0.000402                    -2.766638   
4  0.000766    0.000583                  -104.348722   

   test_neg_mean_ab