In [1]:
# Import libraries.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score



In [2]:
# Read in CSV file diabetes.csv.
df = pd.read_csv('diabetes.csv')
df.head()



Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,Progression
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [3]:
# Progression is the dependent variable.
# Age, sex, bmi, blood pressure and the six blood serum measurements are all independent variables.
y = df.iloc[:,-1].values
X = df.iloc[:,0:9].values


In [4]:
y = y.reshape(-1,1)
diabetes_model = LinearRegression()
diabetes_model.fit(X,y)



In [5]:
# Generate training and test sets.
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size= 0.2)

In [6]:
# Tranform training set x using MinMaxScaler.
scaler = MinMaxScaler().fit(X_train)
X_train_scaledMM = scaler.transform(X_train)
X_train_scaledMM


array([[0.01666667, 1.        , 0.25619835, ..., 0.30263158, 0.14104372,
        0.09989925],
       [0.35      , 1.        , 0.45454545, ..., 0.17105263, 0.10860367,
        0.30136402],
       [0.33333333, 1.        , 0.3677686 , ..., 0.34210526, 0.14104372,
        0.23897543],
       ...,
       [0.15      , 0.        , 0.51239669, ..., 0.59210526, 0.14104372,
        0.31674804],
       [0.6       , 1.        , 0.20247934, ..., 0.25      , 0.14104372,
        0.30136402],
       [0.45      , 1.        , 0.23966942, ..., 0.26315789, 0.42313117,
        0.71285748]])

In [7]:
# Transform test set x using MinMaxScaler.
X_test_scaledMM = scaler.transform(X_test)
X_test_scaledMM

array([[ 0.61666667,  1.        ,  0.41322314,  0.32394366,  0.42857143,
         0.51394422,  0.13157895,  0.56417489,  0.02212664],
       [ 0.61666667,  0.        ,  0.37190083,  0.6056338 ,  0.73891626,
         0.59262948,  0.42105263,  0.42313117,  0.60497559],
       [ 0.8       ,  0.        ,  0.11157025,  0.29577465,  0.35960591,
         0.28984064,  0.48684211,  0.14104372,  0.19336588],
       [ 0.75      ,  1.        ,  0.42975207,  0.69014085,  0.42857143,
         0.4252988 ,  0.25      ,  0.28208745,  0.3315508 ],
       [ 0.28333333,  0.        ,  0.40495868,  0.15492958,  0.27586207,
         0.312749  ,  0.26315789,  0.28208745, -0.01158645],
       [ 0.66666667,  1.        ,  0.25206612,  0.47887324,  0.35960591,
         0.28386454,  0.42105263,  0.14104372,  0.36406262],
       [ 0.51666667,  0.        ,  0.57438017,  0.23      ,  0.54187192,
         0.53585657,  0.21052632,  0.48660085,  0.4141285 ],
       [ 0.45      ,  1.        ,  0.35123967,  0.45070423,  0

In [8]:
# Transform training set x using StandardScaler.
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_train_scaled

array([[-2.1697685 ,  1.07351713, -0.50538266, ..., -0.3428147 ,
        -0.83373784, -1.67137395],
       [-0.65006953,  1.07351713,  0.54906923, ..., -1.12785259,
        -1.01330432, -0.66760578],
       [-0.72605448,  1.07351713,  0.08774653, ..., -0.10730334,
        -0.83373784, -0.97844763],
       ...,
       [-1.56188891, -0.93151751,  0.8566177 , ...,  1.38426865,
        -0.83373784, -0.5909572 ],
       [ 0.4897047 ,  1.07351713, -0.79096338, ..., -0.65682986,
        -0.83373784, -0.66760578],
       [-0.19415984,  1.07351713, -0.59325365, ..., -0.57832607,
         0.72770979,  1.38259891]])

In [9]:
# Transform test set x using StandardScaler.
X_test_scaled = scaler.transform(X_test)
X_test_scaled

array([[ 5.65689645e-01,  1.07351713e+00,  3.29391754e-01,
        -6.86787328e-01, -1.04830543e-01,  1.02766332e+00,
        -1.36336395e+00,  1.50843361e+00, -2.05886437e+00],
       [ 5.65689645e-01, -9.31517508e-01,  1.09714276e-01,
         7.46616720e-01,  1.73375498e+00,  1.55608060e+00,
         3.63719394e-01,  7.27709795e-01,  8.45093505e-01],
       [ 1.40152408e+00, -9.31517508e-01, -1.27425384e+00,
        -8.30127733e-01, -5.13405104e-01, -4.77322604e-01,
         7.56238338e-01, -8.33737839e-01, -1.20569039e+00],
       [ 1.17356923e+00,  1.07351713e+00,  4.17262745e-01,
         1.17663793e+00, -1.04830543e-01,  4.32357775e-01,
        -6.56829857e-01, -5.30140224e-02, -5.17204665e-01],
       [-9.54009325e-01, -9.31517508e-01,  2.85456258e-01,
        -1.54682976e+00, -1.00953136e+00, -3.23479599e-01,
        -5.78326069e-01, -5.30140224e-02, -2.22683481e+00],
       [ 7.93644490e-01,  1.07351713e+00, -5.27350411e-01,
         1.01584898e-01, -5.13405104e-01, -5.174555

In [10]:
# Reshape training set.
y_train = y_train.reshape(-1,1)

In [11]:
# Generate Multiple linear regression model on training set model.
multiple_model = LinearRegression()
multiple_model.fit(X_train,y_train)


In [12]:
# Print intercept and coefficient of training set model.
print("The intercept for the training set: ", multiple_model.intercept_)
print("The coefficient for the training set: ", multiple_model.coef_)

The intercept for the training set:  [150.35836326]
The coefficient for the training set:  [[   8.35116759 -237.76034593  502.2492559   354.48270117 -964.46458669
   625.13355377   99.36874359  127.86359837  872.43108593]]


In [13]:
# Predictions of y values using the model.
y_pred= multiple_model.predict(X_test)
y_pred[:5]

array([[ 89.57880674],
       [186.04547915],
       [ 76.01703594],
       [163.65394604],
       [ 84.80623558]])

In [14]:
r2_score(y_test,y_pred)
# The r2 score is very low and thus indicates that the model does
# not predict the values accurately.

0.46507031610221083