<a href="https://colab.research.google.com/github/Nico-GP/Coding-Dojo_Track3/blob/main/Week9/Abalone_Pipe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **CODING DOJO**: Abalon Pipe
## Creado por: Nicolas Gonzalez
### Descripcion: Ejercicio de prediccion con KNN de edad y sexo de abalon con pipeline
* Creado: 27/02/22
* Ultima Actualizacion: 27/02/22

In [75]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [76]:
import pandas as pd
ab = pd.read_csv('/content/drive/My Drive/CodingDojo/Week9/abalone.csv')
ab.head()

Unnamed: 0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
0,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
1,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
2,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
3,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7
4,I,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12,8


In [77]:
ab.shape

(4176, 9)

Se nombran las columnas de la base de datos

In [78]:
ab.columns =['Sex', 'Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight', 'Viscera weight', 'Shell weight', 'Rings']
ab.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
1,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
2,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
3,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7
4,I,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12,8


In [79]:
ab.shape

(4176, 9)

In [80]:
import numpy as np

Se crea la columna de edad en años ("Age" =  "Rings" + 1.5)

In [81]:
ab.apply(lambda row: row.Rings + 1.5, axis=1)
ab['Age'] = ab.apply(lambda row: row.Rings + 1.5, axis=1)
ab.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings,Age
0,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7,8.5
1,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9,10.5
2,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10,11.5
3,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7,8.5
4,I,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12,8,9.5


In [82]:
ab.isnull().sum()

Sex               0
Length            0
Diameter          0
Height            0
Whole weight      0
Shucked weight    0
Viscera weight    0
Shell weight      0
Rings             0
Age               0
dtype: int64

In [83]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [84]:
from sklearn.metrics import r2_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import LabelEncoder

#Prediccion de edad

Se seleccionan las mediciones deseadas

In [85]:
ab_med = ab.loc[:, ['Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight', 'Viscera weight', 'Shell weight']]
ab_med.head()

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight
0,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07
1,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21
2,0.44,0.365,0.125,0.516,0.2155,0.114,0.155
3,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055
4,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12


In [86]:
medic = ['Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight', 'Viscera weight', 'Shell weight']
ab_med.loc[:, medic].head()

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight
0,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07
1,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21
2,0.44,0.365,0.125,0.516,0.2155,0.114,0.155
3,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055
4,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12


Se crea matriz de mediciones

In [87]:
X = ab_med.loc[:, medic].to_numpy()

In [88]:
X.shape

(4176, 7)

Se crea vector objetivo

In [89]:
y = ab.loc[:, 'Age'].values

In [90]:
y.shape

(4176,)

Se dividen los datos en entrenamiento y testeo

In [91]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 3)

In [92]:
pipe = make_pipeline(StandardScaler(), KNeighborsRegressor())
pipe.fit(X_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('kneighborsregressor', KNeighborsRegressor())])

Coeficiente de determinacion entrenamiento

In [93]:
pipe.score(X_train, y_train)

0.6555512962081227

Coeficiente de determinacion testeo

In [94]:
pipe.score(X_test, y_test)

0.4714544304886764

#Prediccion de sexo

Se seleccionan las caracteristicas deseadas

In [95]:
ab_car = ab.loc[:, ['Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight', 'Viscera weight', 'Shell weight', 'Rings', 'Age']]
ab_car.head()

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings,Age
0,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7,8.5
1,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9,10.5
2,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10,11.5
3,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7,8.5
4,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12,8,9.5


In [96]:
caract = ['Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight', 'Viscera weight', 'Shell weight', 'Rings', 'Age']
ab_car.loc[:, caract].head()

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings,Age
0,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7,8.5
1,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9,10.5
2,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10,11.5
3,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7,8.5
4,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12,8,9.5


Se crea matriz de caracteristicas

In [97]:
X = ab_car.loc[:, caract].to_numpy()

In [98]:
X.shape

(4176, 9)

Se crea vector objetivo

In [99]:
le = LabelEncoder()
y = le.fit_transform(ab.loc[:, 'Sex'].values)

In [100]:
y.shape

(4176,)

Se dividen los datos en entrenamiento y testeo

In [101]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 3)

In [102]:
pipe = make_pipeline(StandardScaler(), KNeighborsClassifier())
pipe.fit(X_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('kneighborsclassifier', KNeighborsClassifier())])

Coeficiente de determinacion entrenamiento

In [103]:
pipe.score(X_train, y_train)

0.6819923371647509

Coeficiente de determinacion testeo

In [104]:
pipe.score(X_test, y_test)

0.539272030651341