# Regresión con Multiples predictores

**Regression with a Crab Age Dataset**
1. Link del dataset: https://www.kaggle.com/datasets/sidhus/crab-age-prediction
2. Predictores: Length, Diameter, Height, Weight, Shucked Weight, Viscera Weight, Shell Weight
3. Target: Edad (Age)

**Problematica:** Predecir la edad de los cangrejos a partir de sus carácteristicas física.


In [None]:
#importar paquetes
%matplotlib inline

import pandas as pd
import numpy as np
from sklearn.preprocessing import scale, PolynomialFeatures, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, RidgeCV, Lasso, LassoCV
from sklearn.metrics import mean_squared_error
import random

In [None]:
#Evitar la impresión de warnings
import warnings
warnings.filterwarnings("ignore")

## Entrenamiento de modelos de Regresión

In [None]:
import pandas as pd

In [None]:
datos = pd.read_csv("/content/CrabAgePrediction.csv")

In [None]:
datos.head()

Unnamed: 0,Sex,Length,Diameter,Height,Weight,Shucked Weight,Viscera Weight,Shell Weight,Age
0,F,1.4375,1.175,0.4125,24.635715,12.332033,5.584852,6.747181,9
1,M,0.8875,0.65,0.2125,5.40058,2.29631,1.374951,1.559222,6
2,I,1.0375,0.775,0.25,7.952035,3.231843,1.601747,2.764076,6
3,F,1.175,0.8875,0.25,13.480187,4.748541,2.282135,5.244657,10
4,I,0.8875,0.6625,0.2125,6.903103,3.458639,1.488349,1.70097,6


In [None]:
y = datos.Age
X = datos.drop(columns=["Age","Sex"])

In [None]:
y.head()

0     9
1     6
2     6
3    10
4     6
Name: Age, dtype: int64

In [None]:
X_train, X_test, y_train, y_test = train_test_split( X, y,test_size=0.20, random_state = 0)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

***
***
# Regularización Polinomial

In [None]:
lambdas = 10**np.linspace(10,-2,100)
lambdas

array([1.00000000e+10, 7.56463328e+09, 5.72236766e+09, 4.32876128e+09,
       3.27454916e+09, 2.47707636e+09, 1.87381742e+09, 1.41747416e+09,
       1.07226722e+09, 8.11130831e+08, 6.13590727e+08, 4.64158883e+08,
       3.51119173e+08, 2.65608778e+08, 2.00923300e+08, 1.51991108e+08,
       1.14975700e+08, 8.69749003e+07, 6.57933225e+07, 4.97702356e+07,
       3.76493581e+07, 2.84803587e+07, 2.15443469e+07, 1.62975083e+07,
       1.23284674e+07, 9.32603347e+06, 7.05480231e+06, 5.33669923e+06,
       4.03701726e+06, 3.05385551e+06, 2.31012970e+06, 1.74752840e+06,
       1.32194115e+06, 1.00000000e+06, 7.56463328e+05, 5.72236766e+05,
       4.32876128e+05, 3.27454916e+05, 2.47707636e+05, 1.87381742e+05,
       1.41747416e+05, 1.07226722e+05, 8.11130831e+04, 6.13590727e+04,
       4.64158883e+04, 3.51119173e+04, 2.65608778e+04, 2.00923300e+04,
       1.51991108e+04, 1.14975700e+04, 8.69749003e+03, 6.57933225e+03,
       4.97702356e+03, 3.76493581e+03, 2.84803587e+03, 2.15443469e+03,
      

***
## 1. Ridge

In [None]:
ridgecv = RidgeCV(alphas = lambdas)
ridgecv.fit(X_train, y_train)
ridgecv.alpha_

0.37649358067924715

In [None]:
ridge = Ridge(alpha = ridgecv.alpha_)
ridge.fit(X_train, y_train)
pred = ridge.predict(X_test)
print(pd.Series(ridge.coef_, index = X.columns))
print("MSE = ", mean_squared_error(y_test, pred))
print("R = ",ridge.score(X_test, y_test))

Length           -0.373072
Diameter          1.516476
Height            0.446814
Weight            4.502929
Shucked Weight   -4.358391
Viscera Weight   -1.120952
Shell Weight      1.175421
dtype: float64
MSE =  5.096685289007378
R =  0.5250247099101417


***
## 2. Lasso


In [None]:
lassocv = LassoCV()
lassocv.fit(X, y)
print("El mejor valor de Lambda es:", lassocv.alpha_)

El mejor valor de Lambda es: 0.02410225542263126


In [None]:
lasso = Lasso(alpha = lassocv.alpha_)
lasso.fit(X_train, y_train)
pred = ridge.predict(X_test)
print(pd.Series(lasso.coef_, index = X.columns))
print("MSE = ", mean_squared_error(y_test, pred))
print("R = ",lasso.score(X_test, y_test))

Length            0.000000
Diameter          1.030072
Height            0.434654
Weight            1.096416
Shucked Weight   -2.824044
Viscera Weight   -0.040175
Shell Weight      2.187616
dtype: float64
MSE =  5.096685289007378
R =  0.5123288496095932


***
***
# Regularización polinomial de grado n

In [None]:
degree = 9  # Grado del polinomio
poly = PolynomialFeatures(degree=degree)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

***
## Ridge

In [None]:
ridgecv = RidgeCV(alphas = lambdas)
ridgecv.fit(X_train_poly, y_train)
ridgecv.alpha_

2477076355.991714

In [None]:
ridge = Ridge(alpha = ridgecv.alpha_)
ridge.fit(X_train_poly, y_train)
pred = ridge.predict(X_test_poly)
print("MSE = ", mean_squared_error(y_test, pred))
print("R = ",ridge.score(X_test_poly, y_test))

MSE =  295538.2540626737
R =  -27541.090593435005


***
## Lasso

In [None]:
lassocv = LassoCV()
lassocv.fit(X_train_poly, y_train)
print("El mejor valor de Lambda es:", lassocv.alpha_)

El mejor valor de Lambda es: 1286338.5403859841


In [None]:
lasso = Lasso(alpha = lassocv.alpha_)
lasso.fit(X_train_poly, y_train)
pred = ridge.predict(X_test_poly)
print("MSE = ", mean_squared_error(y_test, pred))
print("R = ",lasso.score(X_test_poly, y_test))

MSE =  6.773973457662571
R =  0.24663977562581807


***
***

## Para mejores visualizaciones xd

In [None]:
from google.colab import widgets

In [None]:
orders = range(1,7)
tb = widgets.TabBar([str(order) for order in orders])

for order in orders:
  with tb.output_to(str(order), select= (order < 2)):
    print("\n Polinomio de orden", order)

    poly = PolynomialFeatures(degree=int(order))
    X_train_poly = poly.fit_transform(X_train)
    X_test_poly = poly.transform(X_test)

    print('Utilizando Ridge')

    ridgecv = RidgeCV(alphas = lambdas)
    ridgecv.fit(X_train_poly, y_train)
    print("Alpha Optimo = ", ridgecv.alpha_)
    ridge = Ridge(alpha = ridgecv.alpha_)
    ridge.fit(X_train_poly, y_train)
    pred_test = ridge.predict(X_test_poly)
    pred_train = ridge.predict(X_train_poly)
    print("MSE_train = ", mean_squared_error(y_train, pred_train))
    print("MSE_test = ", mean_squared_error(y_test, pred_test))
    print("R = ",ridge.score(X_test_poly, y_test))

    print('Utilizando Lasso')

    lassocv = LassoCV(alphas = lambdas)
    lassocv.fit(X_train_poly, y_train)
    print("Lambda Optimo:", lassocv.alpha_)
    lasso = Lasso(alpha = lassocv.alpha_)
    lasso.fit(X_train_poly, y_train)

    pred_test = lasso.predict(X_test_poly)
    pred_train = lasso.predict(X_train_poly)
    print("MSE_train = ", mean_squared_error(y_train, pred_train))
    print("MSE_test = ", mean_squared_error(y_test, pred_test))
    print("R = ",lasso.score(X_test_poly, y_test),"\n\n---\n")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


 Polinomio de orden 1
Utilizando Ridge
Alpha Optimo =  0.37649358067924715
MSE_train =  4.874681139051995
MSE_test =  5.096685289007377
R =  0.5250247099101419
Utilizando Lasso
Lambda Optimo: 0.01
MSE_train =  4.90053487759001
MSE_test =  5.109728317743257
R =  0.5238091911943137 

---



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


 Polinomio de orden 2
Utilizando Ridge
Alpha Optimo =  0.2848035868435805
MSE_train =  4.330924129939249
MSE_test =  4.744086464343695
R =  0.5578844451170988
Utilizando Lasso
Lambda Optimo: 0.030538555088334123
MSE_train =  4.68398759879774
MSE_test =  4.930850075602393
R =  0.5404793876325497 

---



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


 Polinomio de orden 3
Utilizando Ridge
Alpha Optimo =  57.223676593502205
MSE_train =  4.5280670499713755
MSE_test =  4.9581141178294805
R =  0.5379385702911761
Utilizando Lasso
Lambda Optimo: 0.030538555088334123
MSE_train =  4.689703875317611
MSE_test =  5.093901884833339
R =  0.5252841036396021 

---



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


 Polinomio de orden 4
Utilizando Ridge
Alpha Optimo =  705.4802310718645
MSE_train =  5.18970972631588
MSE_test =  6.773973457662571
R =  0.36871322719223487
Utilizando Lasso
Lambda Optimo: 2.009233002565046
MSE_train =  7.932404521151794
MSE_test =  8.257795150200606
R =  0.2304314619094232 

---



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


 Polinomio de orden 5
Utilizando Ridge
Alpha Optimo =  10000000000.0
MSE_train =  10.220534183055191
MSE_test =  10.655635844337874
R =  0.006969542111559868
Utilizando Lasso
Lambda Optimo: 6.135907273413163
MSE_train =  8.788279851469083
MSE_test =  9.333093604733168
R =  0.13022119456638093 

---



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


 Polinomio de orden 6
Utilizando Ridge
Alpha Optimo =  151991108.2952933
MSE_train =  8.78815764844623
MSE_test =  9.604049970023148
R =  0.10496996344114762
Utilizando Lasso
Lambda Optimo: 6.135907273413163
MSE_train =  8.620346457813135
MSE_test =  9.117472388830905
R =  0.15031557822267527 

---



<IPython.core.display.Javascript object>