# Práctica Dos

Grupo 14:
* Joaquín Ibáñez Penalva
* Aurora Zuoris

Para la realización de esta práctica  se usará la librería de numpy, pandas, matplotlib y sklearn.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [None]:
df = pd.read_csv('tiempos.csv', sep=';')
df.head()

In [None]:
def hhmmss_to_mins(x):
	h, m, s = map(int, x.split(':'))
	return h * 60 + m + s / 60

df = df.applymap(hhmmss_to_mins)
df.head()

In [None]:
data = df.to_numpy()
data.shape

## Ejercicio 1

In [None]:
train, test = train_test_split(data, test_size=0.3) # 70% train, 30% test
x = train[:, :-1] # todas las filas, todas las columnas menos la última
y = train[:, -1] # todas las filas, la última columna
reg = LinearRegression().fit(x, y) 
reg.score(x, y) 
pesos = reg.coef_ # coeficientes
intercepcion = reg.intercept_ # intercepción
prediccion = reg.predict(test[:, :-1]) # predicción
print(pesos)
print(reg.intercept_)

reg2 = LinearRegression(fit_intercept=False).fit(x, y) # sin intercepción
reg2.score(x, y)
pesos2 = reg2.coef_ 
print(pesos2)

In [None]:
ax = plt.subplot(111)
ax.bar( np.arange(len(pesos) + 1), np.append(pesos, intercepcion), width=0.3) # pesos + intercepción
ax.bar( np.arange(len(pesos2))-0.3, pesos2, width=0.3) # pesos sin intercepción
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_xticks(np.arange(len(pesos) + 1), ['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'w0']) 
ax.legend(['w0 != 0', 'w0 = 0'])
ax.set_title('Gráfico de Barras')
ax.grid()
plt.show()

Gráfico cuando la intercepción (w0) se estima o cuando es 0. Se puede apreciar como en P6 suele ser practicamente igual, mientras que en P1 o P2 suele variar más.

In [None]:
mean_squared_error(prediccion, test[:, -1], squared=False) # error cuadrático medio

## Ejercicio 2

Se dividen los datos para entrenar y testear.

In [None]:
X, y = data[:, :-1], data[:, -1]
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3, random_state=42)
print(train_X.shape, test_X.shape, train_y.shape, test_y.shape)

## Ridge

Para esta parte, se entrenan los datos con Ridge varias veces, con la lambda cambiando.

In [None]:
lambdas = [0.1, 0.2, 0.5, 1, 2, 5, 10]
ridge_models = [Ridge(alpha=l, fit_intercept=True).fit(train_X, train_y) for l in lambdas]
ridge_models_uncentered = [Ridge(alpha=l, fit_intercept=False).fit(train_X, train_y) for l in lambdas]

In [None]:
print('--- centered ---')
for l, m in zip(lambdas, ridge_models):
	print('MSE for lambda = {:>4.1f}: {:.5f}'.format(l, mean_squared_error(test_y, m.predict(test_X), squared=False)))

print('--- uncentered ---')
for l, m in zip(lambdas, ridge_models_uncentered):
	print('MSE for lambda = {:>4.1f}: {:.5f}'.format(l, mean_squared_error(test_y, m.predict(test_X), squared=False)))

In [None]:
print('--- centered ---')
for l, m in zip(lambdas, ridge_models):
	print(f"lambda = {l}, intercept = {m.intercept_}, coef = {m.coef_}")
print('--- uncentered ---')
for l, m in zip(lambdas, ridge_models_uncentered):
	print(f"lambda = {l}, intercept = {m.intercept_}, coef = {m.coef_}")

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(8, 4.5))
fig.tight_layout(pad=3)

ridge_models_reduced = ridge_models[1:3]
ridge_models_uncentered_reduced = ridge_models_uncentered[1:3]

models = [ridge_models_reduced, ridge_models_uncentered_reduced]

for ms, row in zip(models, range(2)):
	for m, col in zip(ms, range(2)):
		axs[row, col].set_ylabel('y')
		c_text = 'centered' if m.fit_intercept else 'uncentered'
		axs[row, col].set_title('{}, lambda = {}'.format(c_text, m.alpha))
		axs[row, col].bar(range(0, 7), np.insert(m.coef_, 0, m.intercept_))

## Lasso

Para esta parte, se entrenan los datos con Lasso varias veces, con la lambda cambiando.

In [None]:
lambdas = [0.1, 0.2, 0.5, 1, 2, 5, 10]
lasso_models = [Lasso(alpha=l, fit_intercept=True, max_iter=50_000).fit(train_X, train_y) for l in lambdas]
lasso_models_uncentered = [Lasso(alpha=l, fit_intercept=False, max_iter=50_000).fit(train_X, train_y) for l in lambdas]

In [None]:
print('--- centered ---')
for l, m in zip(lambdas, lasso_models):
	print('MSE for lambda = {:>4.1f}: {:.5f}'.format(l, mean_squared_error(test_y, m.predict(test_X), squared=False)))

print('--- uncentered ---')
for l, m in zip(lambdas, lasso_models_uncentered):
	print('MSE for lambda = {:>4.1f}: {:.5f}'.format(l, mean_squared_error(test_y, m.predict(test_X), squared=False)))

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(16, 9))
fig.tight_layout(pad=3)

lasso_models_reduced = lasso_models[1:3]
lasso_models_uncentered_reduced = lasso_models_uncentered[1:3]

models = [lasso_models_reduced, lasso_models_uncentered_reduced]

for ms, row in zip(models, range(2)):
	for m, col in zip(ms, range(2)):
		axs[row, col].set_ylabel('y')
		c_text = 'centered' if m.fit_intercept else 'uncentered'
		axs[row, col].set_title('{}, lambda = {}'.format(c_text, m.alpha))
		axs[row, col].bar(range(0, 7), np.insert(m.coef_, 0, m.intercept_))