In [40]:
# set up working catalog
import sys
from pathlib import Path
sys.path.append(str(Path("..")))

# imports
from common.consts import DATA_PATH
from common.utils import get_numeric_data
from common.custom_linear_regression import CustomLinearRegression

import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [41]:
data = get_numeric_data()

X = data.drop("Admission grade", axis=1)
y = data["Admission grade"]

X = StandardScaler().fit_transform(X) 

In [42]:
kfold = KFold(n_splits=3, shuffle=True, random_state=6)
print("Numer of splits:", kfold.get_n_splits(X))

Numer of splits: 3


In [43]:
# best = (None, 0)
model = CustomLinearRegression()
close_form_rows = []
gradient_descent_rows = []

for i, (train_index, test_index) in enumerate(kfold.split(X, y), 1):
    X_train = X[train_index]
    y_train = y.iloc[train_index]
    
    X_val = X[test_index]
    y_val = y.iloc[test_index]
    
    model.fit_closed_form(X_train, y_train)
    y_pred = model.predict(X_val)
    
    mse = mean_squared_error(y_val, y_pred)
    mae = mean_absolute_error(y_val, y_pred)
    r2 = r2_score(y_val, y_pred)
    
    close_form_rows.append([mse, mae, r2])
    
    print(f"\t===== FOLD {i} =====")
    
    print("Close form")
    print(f"\tMSE: {mse}")
    print(f"\tMAE: {mae}")
    print(f"\tR2: {r2}")
    
    model.fit_gradient_descent(X_train, y_train)
    y_pred = model.predict(X_val)
    
    mse = mean_squared_error(y_val, y_pred)
    mae = mean_absolute_error(y_val, y_pred)
    r2 = r2_score(y_val, y_pred)
    
    gradient_descent_rows.append([mse, mae, r2])
    
    print("Gradient descent")
    print(f"\tMSE: {mse}")
    print(f"\tMAE: {mae}")
    print(f"\tR2: {r2}")
    print()

	===== FOLD 1 =====
Close form
	MSE: 136.0730718554674
	MAE: 8.250195483548268
	R2: 0.3343825961488184
Gradient descent
	MSE: 135.8775963840628
	MAE: 8.240171649313002
	R2: 0.3353387873629853

	===== FOLD 2 =====
Close form
	MSE: 142.7706339001175
	MAE: 8.43368280123414
	R2: 0.3540815586161785
Gradient descent
	MSE: 142.90765833117825
	MAE: 8.436312488947074
	R2: 0.35346163696615496

	===== FOLD 3 =====
Close form
	MSE: 127.49775931360539
	MAE: 7.97309608742398
	R2: 0.3722983204666459
Gradient descent
	MSE: 127.23944573169494
	MAE: 7.967027986731054
	R2: 0.3735700594374669



In [44]:
df_close_form = pd.DataFrame(close_form_rows, columns=["MSE", "MAE", "R2"])
df_close_form

Unnamed: 0,MSE,MAE,R2
0,136.073072,8.250195,0.334383
1,142.770634,8.433683,0.354082
2,127.497759,7.973096,0.372298


In [46]:
df_gradeint_descent = pd.DataFrame(gradient_descent_rows, columns=["MSE", "MAE", "R2"])
df_gradeint_descent

Unnamed: 0,MSE,MAE,R2
0,135.877596,8.240172,0.335339
1,142.907658,8.436312,0.353462
2,127.239446,7.967028,0.37357
