In [None]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn import datasets, tree
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor

In [None]:
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target
feature_names = diabetes.feature_names

In [None]:
def fit_and_error(X, y, seed, model, model_name):
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    print("RMSE ({}): {}".format(model_name, rmse))
    r2 = r2_score(y_test, y_pred)
    print("R-squared score ({}) : {}".format(model_name, r2))

# Linear regression models

In [None]:
seed = 3012

fit_and_error(X, y, seed, LinearRegression(), "linear regression")
fit_and_error(X, y, seed, Ridge(alpha = 0.01), "small Ridge")
fit_and_error(X, y, seed, Ridge(alpha = 100), "big Ridge")
fit_and_error(X, y, seed, Lasso(alpha = 0.01), "small Lasso")
fit_and_error(X, y, seed, Lasso(alpha = 10), "big Lasso")

RMSE (linear regression): 58.01703303432585
R-squared score (linear regression) : 0.5295657911827079
RMSE (small Ridge): 58.34626135065782
R-squared score (small Ridge) : 0.5242115114945769
RMSE (big Ridge): 87.22003521044822
R-squared score (big Ridge) : -0.06321324831254205
RMSE (small Lasso): 58.26101896634339
R-squared score (small Lasso) : 0.525600725392204
RMSE (big Lasso): 88.0562185057193
R-squared score (big Lasso) : -0.08369713825220626


# Decision tree regressor

In [None]:
seed = 3012
fit_and_error(X, y, seed, tree.DecisionTreeRegressor(random_state=seed), "decision tree regressor")

RMSE (decision tree regressor): 86.38683166249753
R-squared score (decision tree regressor) : -0.042996752780275926


# KNN regressor

In [None]:
seed = 3012
fit_and_error(X, y, seed, KNeighborsRegressor(n_neighbors=30), "KNN regressor")

kf = KFold()

kNN = KNeighborsRegressor()
i = 1

for train_ind, test_ind in kf.split(X, y):
    X_train, y_train, X_test, y_test = X[train_ind], y[train_ind], X[test_ind], y[test_ind]
    kNN.fit(X_train, y_train)
    y_pred = kNN.predict(X_test)

    model_name = "Fold " + str(i)

    rmse = mean_squared_error(y_test, y_pred, squared=False)
    print("RMSE ({}): {}".format(model_name, rmse))
    r2 = r2_score(y_test, y_pred)
    print("R-squared score ({}) : {}".format(model_name, r2))

    i += 1

RMSE (KNN regressor): 65.4395337894654
R-squared score (KNN regressor) : 0.4014943459791206
RMSE (Fold 1): 56.27781010409307
R-squared score (Fold 1) : 0.3500881704895432
RMSE (Fold 2): 63.42066440961733
R-squared score (Fold 2) : 0.36602995137367067
RMSE (Fold 3): 59.63695849204433
R-squared score (Fold 3) : 0.43172984693193817
RMSE (Fold 4): 59.59086644475279
R-squared score (Fold 4) : 0.32312430274373505
RMSE (Fold 5): 61.70551028879026
R-squared score (Fold 5) : 0.41156845751616766


# Random forest regressor

In [None]:
seed = 3012
fit_and_error(X, y, seed, RandomForestRegressor(random_state=seed), "Random forest regressor")

RMSE (Random forest regressor): 62.0870605399103
R-squared score (Random forest regressor) : 0.46124655245531554


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=5161fece-8ef6-4825-9ea6-16ebba884483' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>