*Creado por:*

*Isabel Maniega*

# Regresión Lineal Múltiple

In [None]:
# pip install scikit-learn

In [1]:
import numpy as np
from sklearn import datasets, linear_model
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
dataset = datasets.load_diabetes()
print(dataset.DESCR)
# Crear un DataFrame con los datos
data = pd.DataFrame(dataset.data, columns=dataset.feature_names)
data['level'] = dataset.target

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

:Number of Instances: 442

:Number of Attributes: First 10 columns are numeric predictive values

:Target: Column 11 is a quantitative measure of disease progression one year after baseline

:Attribute Information:
    - age     age in years
    - sex
    - bmi     body mass index
    - bp      average blood pressure
    - s1      tc, total serum cholesterol
    - s2      ldl, low-density lipoproteins
    - s3      hdl, high-density lipoproteins
    - s4      tch, total cholesterol / HDL
    - s5      ltg, possibly log of serum triglycerides level
    - s6      glu, blood sugar level

Note: Each of these 10 feature variables have bee

In [3]:
data

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,level
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.050680,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0
...,...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207,178.0
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485,104.0
439,0.041708,0.050680,-0.015906,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491,132.0
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930,220.0


In [4]:
# Load the diabetes dataset
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)

In [5]:
from sklearn.model_selection import train_test_split
# Separo los datos de "train" entrenamiento y "test" prueba para probar los algoritmos

X_train, X_test, y_train, y_test = train_test_split(diabetes_X, diabetes_y, test_size=0.2)

In [6]:
lr_multiple = linear_model.LinearRegression()

In [7]:
lr_multiple.fit(X_train, y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [8]:
y_pred = lr_multiple.predict(X_test)
y_pred

array([ 69.89539793, 111.3956397 , 222.09049192, 187.78702915,
       147.91766319, 153.48589202, 189.69568986,  88.30502131,
       263.70051781,  90.30471166, 110.13815517, 195.93187358,
       131.58379584,  93.11914626, 117.1383787 , 187.37929332,
       196.92826   , 156.77500897,  57.37662792, 149.24980365,
       272.31142886, 112.7688847 , 101.57930354,  69.01030977,
        82.89170322, 123.01143542, 188.69956361, 117.29388231,
       156.66863729, 180.44945559, 223.74625971, 270.59784514,
       130.74801946, 210.05257874,  74.6296211 , 161.87608841,
        59.16341353, 176.93115934, 225.94321479, 154.27106013,
        81.36548693, 180.84752225, 197.29043982, 195.54257411,
       176.12989844, 144.98181296, 166.33335655, 104.26068736,
        85.3348765 , 235.96034916, 101.31173506, 144.48949596,
       188.14339677, 237.17399765, 156.90510172, 153.09478569,
        94.49064876, 294.33093511, 205.49674258, 170.91562665,
       171.01984749, 164.52266091, 132.01252944, 187.42

In [9]:
y_test

array([158.,  96., 310., 292., 141., 182., 222.,  54., 263., 115., 107.,
       233., 148.,  88.,  60., 122., 241.,  94.,  63.,  90., 243.,  87.,
        69., 143.,  42., 139., 178., 177.,  95., 200., 173., 303., 140.,
       265.,  80., 216.,  65., 180., 192., 259.,  65., 257., 281.,  78.,
       109., 168., 190.,  90.,  53., 152., 125., 214., 167., 246., 210.,
       134., 118., 270., 197., 244., 174., 206., 170., 229., 280.,  78.,
        97., 129., 275., 136.,  93.,  71., 219., 242., 259.,  99.,  53.,
       171., 111., 132.,  70., 281.,  25., 232., 156., 145., 261., 101.,
        77.])

In [10]:
print('DATOS DEL MODELO REGRESIÓN LINEAL MULTIPLE')
print()
print('Valor de las pendientes o coeficientes "a":')
print(lr_multiple.coef_)
print('Valor de la intersección o coeficiente "b":')
print(lr_multiple.intercept_)

DATOS DEL MODELO REGRESIÓN LINEAL MULTIPLE

Valor de las pendientes o coeficientes "a":
[   8.06756295 -257.71078601  566.85449184  257.6379091  -371.21950691
   98.15047744  -91.79427087   94.76616109  640.32613394   85.91753089]
Valor de la intersección o coeficiente "b":
152.02845179710067


In [11]:
print("Precisión del modelo:")
print(lr_multiple.score(X_train, y_train))

Precisión del modelo:
0.51551642553972


*Creado por:*

*Isabel Maniega*