In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV, Lasso
from sklearn import neighbors
from sklearn.datasets import fetch_openml


In [None]:
raw_data = pd.read_csv("datasets/prostate_dataset.txt", delimiter="\t")


In [None]:
raw_data.head()


In [None]:
X_train = raw_data.iloc[:60, 1:-3]
y_train = raw_data.iloc[:60, -2]
X_test = raw_data.iloc[60:, 1:-3]
y_test = raw_data.iloc[60:, -2]


In [None]:
# On crée un modèle de régression linéaire
lr = LinearRegression()

# On entraîne ce modèle sur les données d'entrainement
lr.fit(X_train, y_train)

# On récupère l'erreur de norme 2 sur le jeu de données test comme baseline
baseline_error = np.mean((lr.predict(X_test) - y_test) ** 2)

print(baseline_error)


In [None]:
n_alphas = 200
alphas = np.logspace(-5, 5, n_alphas)


In [None]:
ridge = Ridge()

coefs = []
errors = []
for a in alphas:
    ridge.set_params(alpha=a)
    ridge.fit(X_train, y_train)
    coefs.append(ridge.coef_)
    errors.append([baseline_error, np.mean((ridge.predict(X_test) - y_test) ** 2)])


In [None]:
ax = plt.gca()

ax.plot(alphas, coefs)
ax.set_xscale("log")
plt.xlabel("alpha")
plt.ylabel("weights")
plt.title("Ridge coefficients as a function of the regularization")
plt.axis("tight")
plt.show()


In [None]:
ax = plt.gca()

ax.plot(alphas, errors)
ax.set_xscale("log")
plt.xlabel("alpha")
plt.ylabel("error")
plt.axis("tight")
plt.show()


In [None]:
min(errors)[1]


# Lasso

In [None]:
n_alphas = 300
alphas = np.logspace(-5, 1, n_alphas)
lasso = Lasso(fit_intercept=False)

coefs = []
errors = []
for a in alphas:
    lasso.set_params(alpha=a)
    lasso.fit(X_train, y_train)
    coefs.append(lasso.coef_)
    errors.append([baseline_error, np.mean((lasso.predict(X_test) - y_test) ** 2)])


In [None]:
ax = plt.gca()

ax.plot(alphas, coefs)
ax.set_xscale("log")
plt.xlabel("alpha")
plt.ylabel("weights")
plt.axis("tight")
plt.show()


In [None]:
min(errors)[1]
