### SCIKIT LEARN LIBRARY MODELS FOR REGRESSION

#### Libraries

In [37]:
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.metrics import r2_score
import pandas as pd
import numpy as np
import seaborn as sns

### Diabetes Dataset

In [15]:
diabetes = load_diabetes(as_frame=True)
df = pd.DataFrame(data=diabetes.data, columns=diabetes.feature_names)
df['target']=diabetes.target
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


### Train Test Split Dataset

In [17]:
X = df.drop("target", axis=1)
y = df['target']


In [20]:
X.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641


In [19]:
y.head()

0    151.0
1     75.0
2    141.0
3    206.0
4    135.0
Name: target, dtype: float64

In [21]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=30)
X_train.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
110,0.041708,0.05068,-0.032073,-0.022885,-0.049727,-0.040144,0.030232,-0.039493,-0.126097,0.015491
415,-0.005515,-0.044642,0.008883,-0.050427,0.02595,0.047224,-0.043401,0.07121,0.014821,0.003064
426,0.030811,0.05068,-0.034229,0.043666,0.057597,0.068831,-0.032356,0.057557,0.035459,0.085907
332,0.030811,-0.044642,0.104809,0.076958,-0.011201,-0.011335,-0.058127,0.034309,0.057108,0.036201
13,0.005383,0.05068,-0.001895,0.008101,-0.004321,-0.015719,-0.002903,-0.002592,0.038394,-0.013504


In [22]:
X_test.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
148,-0.060003,0.05068,0.01535,-0.019442,0.036958,0.048164,0.019187,-0.002592,-0.030748,-0.001078
408,0.063504,-0.044642,-0.050396,0.107944,0.031454,0.019354,-0.017629,0.023608,0.058038,0.040343
214,0.030811,-0.044642,-0.033151,-0.022885,-0.046975,-0.081167,0.103865,-0.076395,-0.039809,-0.054925
67,0.041708,0.05068,-0.014828,-0.017135,-0.005697,0.008394,-0.013948,-0.001854,-0.011897,0.003064
241,0.030811,0.05068,-0.008362,0.004658,0.014942,0.027496,0.008142,-0.008127,-0.029526,0.056912


In [23]:
y_train.head()

110    104.0
415    174.0
426    120.0
332    270.0
13     185.0
Name: target, dtype: float64

In [24]:
y_test.head()

148     96.0
408    189.0
214     65.0
67      97.0
241    177.0
Name: target, dtype: float64

### Model Selection...

## Linear Regression..

In [25]:
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
print("Weights :", lr_model.coef_)
print("Intercept: ", lr_model.intercept_)

Weights : [   8.00062763 -241.90797765  519.81946304  364.35047673 -970.04612474
  610.41491496  159.71934694   85.59991741  865.75126761   54.57428586]
Intercept:  152.76892487267367


### Model Evaluation

In [29]:
y_predicts = lr_model.predict(X_test)
MAE = mean_absolute_error(y_test, y_predicts)
MSE = mean_squared_error(y_test, y_predicts)
RMSE = np.sqrt(MSE)
print("MAE:", MAE)
print("MSE: ", MSE)
print("RMSE: ", RMSE)

MAE: 44.48706927404859
MSE:  3287.479348826673
RMSE:  57.336544618826416


In [31]:
R2 = r2_score(y_test, y_predicts)
print("R2 Score: ", R2)

R2 Score:  0.4653044632644133


In [34]:
n = len(X_test)
m = 10
r2_adj = 1 - ((1-R2)*(n-1)/(n-m-1))
print("R2 Adjusted: ", r2_adj)


R2 Adjusted:  0.39675375342651753


### SGDRegressor

In [36]:
sgd_model = SGDRegressor(loss='squared_error', penalty='elasticnet', learning_rate='constant', alpha=0.001)
sgd_model.fit(X_train, y_train)
print("Weights :", sgd_model.coef_)
print("Intercept: ", sgd_model.intercept_)

Weights : [  54.23453239  -60.33251427  279.72019549  200.85056988   20.43631226
  -18.20474023 -145.4354685   112.01109474  256.43514069  128.82322395]
Intercept:  [155.65120309]


### Evaluation

In [38]:
sgd_predict = sgd_model.predict(X_test)
R2 = r2_score(y_test, sgd_predict)
n = len(X_test)
m = 10
r2_adj = 1 - ((1-R2)*(n-1)/(n-m-1))
print("R2 Adjusted: ", r2_adj)

R2 Adjusted:  0.38135121035797137
