In [3]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression as LR, Ridge as R, Lasso as LA 

In [40]:
def load_usps(file):
    with open(file,"r") as f:
        f.readline()
        data = [[float(x) for x in l.split()] for l in f if len(l.split())>2]
    tmp = np.array(data)
    n = tmp.shape[0]
    tmp[:, 1:] = (tmp[:, 1:] - tmp[:, 1:].min(axis=1).reshape(n, 1)) / tmp[:, 1:].max(axis=1).reshape(n, 1)
    return tmp[:, 1:], tmp[:, 0].astype(int)

x_train, y_train = load_usps("USPS/USPS_train.txt")
x_test,  y_test  = load_usps("USPS/USPS_test.txt")

#On centre et on réduit pour les calculs futurs
x_train = (x_train - x_train.mean()) / x_train.std()
x_test  = (x_test  - x_test.mean()) / x_test.std()

In [50]:
#Régression Linéaire
lin = LR()
lin.fit(x_train,y_train)
nb_zeros_lin = sum(np.array(lin.coef_) == 0)
mean_lin = np.abs(np.array(lin.coef_).mean())
score_lin = lin.score(x_test,y_test)

print("nb_zeros_lin : {0}".format(nb_zeros_lin))
print("mean_lin : {0}".format(mean_lin))
print("score_lin : {0}".format(score_lin))
print("\n")

nb_zeros_lin : 0
mean_lin : 0.008772592549280289
score_lin : 0.5890606133281702




In [51]:
#Régression Ridge 

#Pour alpha = 0.25
ridge25 = R(alpha=0.25)
ridge25.fit(x_train,y_train)
nb_zeros_ridge25 = sum(np.array(ridge25.coef_) == 0)
mean_ridge25 = np.abs(np.array(ridge25.coef_).mean())
score_ridge25 = ridge25.score(x_test,y_test)

print("nb_zeros_ridge25 : {0}".format(nb_zeros_ridge25))
print("mean_ridge25 : {0}".format(mean_ridge25))
print("score_ridge25 : {0}".format(score_ridge25))
print("\n")

#Pour alpha = 0.5
ridge5 = R(alpha=0.5)
ridge5.fit(x_train,y_train)
nb_zeros_ridge5 = sum(np.array(ridge5.coef_) == 0)
mean_ridge5 = np.abs(np.array(ridge5.coef_).mean())
score_ridge5 = ridge5.score(x_test,y_test)

print("nb_zeros_ridge5 : {0}".format(nb_zeros_ridge5))
print("mean_ridge5 : {0}".format(mean_ridge5))
print("score_ridge5 : {0}".format(score_ridge5))
print("\n")

#Pour alpha = 0.75
ridge75 = R(alpha=0.75)
ridge75.fit(x_train,y_train)
nb_zeros_ridge75 = sum(np.array(ridge75.coef_) == 0)
mean_ridge75 = np.abs(np.array(ridge75.coef_).mean())
score_ridge75 = ridge75.score(x_test,y_test)

print("nb_zeros_ridge75 : {0}".format(nb_zeros_ridge75))
print("mean_ridge75 : {0}".format(mean_ridge75))
print("score_ridge75 : {0}".format(score_ridge75))
print("\n")

#Pour alpha = 1.00
ridge100 = R()
ridge100.fit(x_train,y_train)
nb_zeros_ridge100 = sum(np.array(ridge100.coef_) == 0)
mean_ridge100 = np.abs(np.array(ridge100.coef_).mean())
score_ridge100 = ridge100.score(x_test,y_test)

print("nb_zeros_ridge100 : {0}".format(nb_zeros_ridge100))
print("mean_ridge100 : {0}".format(mean_ridge100))
print("score_ridge100 : {0}".format(score_ridge100))
print("\n")

nb_zeros_ridge25 : 0
mean_ridge25 : 0.00868729697664616
score_ridge25 : 0.5890910028129361


nb_zeros_ridge5 : 0
mean_ridge5 : 0.008605691595768614
score_ridge5 : 0.5891208931344325


nb_zeros_ridge75 : 0
mean_ridge75 : 0.008527520678511927
score_ridge75 : 0.5891503252908986


nb_zeros_ridge100 : 0
mean_ridge100 : 0.008452551730036745
score_ridge100 : 0.5891793348140035




In [54]:
#Régression LASSO 

#Pour alpha = 0.25
ridgeL25 = LA(alpha=0.25)
ridgeL25.fit(x_train,y_train)
nb_zeros_ridgeL25 = sum(np.array(ridgeL25.coef_) == 0)
mean_ridgeL25 = np.abs(np.array(ridgeL25.coef_).mean())
score_ridgeL25 = ridgeL25.score(x_test,y_test)

print("nb_zeros_ridgeL25 : {0}".format(nb_zeros_ridgeL25))
print("mean_ridgeL25 : {0}".format(mean_ridgeL25))
print("score_ridgeL25 : {0}".format(score_ridgeL25))
print("\n")

#Pour alpha = 0.5
ridgeL5 = LA(alpha=0.5)
ridgeL5.fit(x_train,y_train)
nb_zeros_ridgeL5 = sum(np.array(ridgeL5.coef_) == 0)
mean_ridgeL5 = np.abs(np.array(ridgeL5.coef_).mean())
score_ridgeL5 = ridgeL5.score(x_test,y_test)

print("nb_zeros_ridgeL5 : {0}".format(nb_zeros_ridgeL5))
print("mean_ridgeL5 : {0}".format(mean_ridgeL5))
print("score_ridgeL5 : {0}".format(score_ridgeL5))
print("\n")

#Pour alpha = 0.75
ridgeL75 = LA(alpha=0.75)
ridgeL75.fit(x_train,y_train)
nb_zeros_ridgeL75 = sum(np.array(ridgeL75.coef_) == 0)
mean_ridgeL75 = np.abs(np.array(ridgeL75.coef_).mean())
score_ridgeL75 = ridgeL75.score(x_test,y_test)

print("nb_zeros_ridgeL75 : {0}".format(nb_zeros_ridgeL75))
print("mean_ridgeL75 : {0}".format(mean_ridgeL75))
print("score_ridgeL75 : {0}".format(score_ridgeL75))
print("\n")

#Pour alpha = 1.00
ridgeL100 = LA()
ridgeL100.fit(x_train,y_train)
nb_zeros_ridgeL100 = sum(np.array(ridgeL100.coef_) == 0)
mean_ridgeL100 = np.abs(np.array(ridgeL100.coef_).mean())
score_ridgeL100 = ridgeL100.score(x_test,y_test)

print("nb_zeros_ridgeL100 : {0}".format(nb_zeros_ridgeL100))
print("mean_ridgeL100 : {0}".format(mean_ridgeL100))
print("score_ridgeL100 : {0}".format(score_ridgeL100))
print("\n")

#print(ridgeL100.coef_.shape)

nb_zeros_ridgeL25 : 228
mean_ridgeL25 : 0.0022371190663526075
score_ridgeL25 : 0.49173662360426335


nb_zeros_ridgeL5 : 241
mean_ridgeL5 : 0.002633647358145477
score_ridgeL5 : 0.3945953158410368


nb_zeros_ridgeL75 : 248
mean_ridgeL75 : 0.002059619367114527
score_ridgeL75 : 0.2907511425941136


nb_zeros_ridgeL100 : 252
mean_ridgeL100 : 0.0011849242014583814
score_ridgeL100 : 0.19001898995000144


(256,)
