# Regresión Lineal con clases

In [20]:
#Importar librerías
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

## Clase de Regresión

In [28]:
#Clase AnalisisRegresion
class AnalisisRegresion:
    """
    Una clase para realizar el análisis de una regresión lineal, según los datos establecidos
    """
    #Constructor
    def __init__(self, x_data, y_data, test_size):
        self.x_data = x_data
        self.y_data = y_data
        self.test_size = test_size

    #Evualar la regresión
    def ejecutar_regresion(self, variables_predictoras):

        # 1. Seleccionar las variables que voy a utilizar
        x_filtrado = self.x_data[variables_predictoras]

        # 2. Generar el conjunto de test y train
        X_train, X_test, y_train, y_test = train_test_split(x_filtrado, self.y_data,test_size= self.test_size, random_state=45)

        # 3. Entrenamiento del modelo
        self.modelo = LinearRegression()
        self.modelo.fit(X_train, y_train)

        # 4. Predicción
        y_pred = self.modelo.predict(X_test)

        # 5. Calcular las métricas
        r2= r2_score(y_test, y_pred)
        rmse=np.sqrt(mean_squared_error(y_test, y_pred))

        print ('Resultados para las variables '+ str(variables_predictoras))
        print('El r-2 del modelo es '+str(r2))
        print('El RMSE del modelo es '+str(rmse))



## Regresión aplicada a Alturas de personas

In [4]:
alturas=pd.read_excel('alturas.xlsx')


In [5]:
alturas

Unnamed: 0,Peso,Mujer,CaloriasAlDia,AlturaMama,AlturaPapa,AlturaPersona
0,70,0,2726,145,160,158
1,62,0,2072,158,160,160
2,70,0,2994,166,160,167
3,90,0,2923,172,160,160
4,82,0,2620,147,161,162
...,...,...,...,...,...,...
134,76,1,2800,151,188,186
135,90,1,2716,168,188,186
136,67,1,2129,149,189,186
137,63,1,2149,152,189,186


In [7]:
altura_x=alturas.drop(['AlturaPersona'], axis=1)
altura_x

Unnamed: 0,Peso,Mujer,CaloriasAlDia,AlturaMama,AlturaPapa
0,70,0,2726,145,160
1,62,0,2072,158,160
2,70,0,2994,166,160
3,90,0,2923,172,160
4,82,0,2620,147,161
...,...,...,...,...,...
134,76,1,2800,151,188
135,90,1,2716,168,188
136,67,1,2129,149,189
137,63,1,2149,152,189


In [32]:
altura_y=pd.Series(alturas['AlturaPersona'])

analisis_regresion= AnalisisRegresion(altura_x, altura_y, 0.2)

analisis_regresion.ejecutar_regresion(['Peso', 'AlturaMama', 'AlturaPapa'])

analisis_regresion.ejecutar_regresion(['Mujer', 'AlturaMama', 'AlturaPapa'])

analisis_regresion.ejecutar_regresion(['Mujer', 'AlturaMama', 'AlturaPapa', 'CaloriasAlDia', 'Peso'])

Resultados para las variables ['Peso', 'AlturaMama', 'AlturaPapa']
El r-2 del modelo es 0.4451441834845081
El RMSE del modelo es 5.990067782817965
Resultados para las variables ['Mujer', 'AlturaMama', 'AlturaPapa']
El r-2 del modelo es 0.7366192664982241
El RMSE del modelo es 4.126992377190184
Resultados para las variables ['Mujer', 'AlturaMama', 'AlturaPapa', 'CaloriasAlDia', 'Peso']
El r-2 del modelo es 0.7507523230746693
El RMSE del modelo es 4.014738150094367


## Diabetes

In [33]:
from sklearn.datasets import load_diabetes

In [34]:
diabetes_data=load_diabetes()

In [36]:
diabetes_data

{'data': array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
          0.01990749, -0.01764613],
        [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
         -0.06833155, -0.09220405],
        [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
          0.00286131, -0.02593034],
        ...,
        [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
         -0.04688253,  0.01549073],
        [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
          0.04452873, -0.02593034],
        [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
         -0.00422151,  0.00306441]]),
 'target': array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
         69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
         68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
         87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
        259.,  53., 190., 142.,  75., 142., 155., 225.,  59

In [43]:
#Pasar a pandas
diabetes_x=pd.DataFrame(data=diabetes_data.data, columns = diabetes_data.feature_names)
diabetes_x

diabetes_y=pd.Series(diabetes_data.target)
diabetes_y

0      151.0
1       75.0
2      141.0
3      206.0
4      135.0
       ...  
437    178.0
438    104.0
439    132.0
440    220.0
441     57.0
Length: 442, dtype: float64

In [44]:
analisis_regresion_diabetes= AnalisisRegresion(diabetes_x, diabetes_y, 0.2)


In [45]:
diabetes_x

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204
2,0.085299,0.050680,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641
...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485
439,0.041708,0.050680,-0.015906,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930


In [49]:
analisis_regresion_diabetes.ejecutar_regresion(['age','sex', 'bmi', 'bp','s1','s2','s3','s4','s5','s6'])

Resultados para las variables ['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']
El r-2 del modelo es 0.5188113124539246
El RMSE del modelo es 48.727137609532534


In [50]:
analisis_regresion_diabetes.ejecutar_regresion(['age', 'bmi', 'bp','s1','s2','s3','s4','s5','s6'])

Resultados para las variables ['age', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']
El r-2 del modelo es 0.5012198039939113
El RMSE del modelo es 49.60983660457265
