# Part 1: Keras vs SKLearn's Linear Regression on Boston Housing

In [13]:

import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [14]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 17131076743790566984
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 8217475731201738393
physical_device_desc: "device: XLA_CPU device"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 18172444646721177174
physical_device_desc: "device: XLA_GPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 15884438733
locality {
  bus_id: 1
  links {
  }
}
incarnation: 3378199873200811827
physical_device_desc: "device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0"
]


In [15]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

['/job:localhost/replica:0/task:0/device:GPU:0']

In [16]:
from sklearn.datasets import load_boston

boston = load_boston()

df = pd.DataFrame(boston.data)
df.columns = boston.feature_names
df['MEDV'] = boston.target
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [17]:
from sklearn.model_selection import train_test_split

X = df
Y = boston.target

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = .25)

print("X:", X_train.shape, X_test.shape)
print("Y:", Y_train.shape, Y_test.shape)

X: (379, 14) (127, 14)
Y: (379,) (127,)


In [18]:
model = Sequential()
model.add(Dense(14, input_dim = 14, kernel_initializer = 'normal', activation = 'relu'))
model.add(Dense(1, kernel_initializer = 'normal'))
model.compile(loss='mean_squared_error', optimizer='adam', metrics = ['mse'])
model


<keras.engine.sequential.Sequential at 0x7ff4801fdcf8>

In [19]:
model.fit(X_train, Y_train, batch_size = 1, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7ff4b0877b38>

In [20]:
from sklearn.metrics import r2_score

Y_pred = model.predict(X_test)
r2_score(Y_test, Y_pred)

0.9819219905098774

Comparing to SKLearn linear regression

https://github.com/RaymondDashWu/DS-2.1-Machine-Learning/blob/master/Boston_Housing_Prices_Linear_Regression.ipynb

1. <img src="https://i.imgur.com/KlQqTEH.png"></img>

# Part 2: Logistic Regression on Diabetes Dataset

In [21]:
from sklearn.datasets import load_diabetes

# diab = load_diabetes()
df_diab = pd.read_csv('../input/diabetes.csv')
# df_diab = pd.DataFrame(diab.data)
# df_diab.columns = diab.feature_names
df_diab.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [22]:
len(df_diab.columns)

9

In [23]:
X = np.array(df_diab)
Y = df_diab['Outcome']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = .25)

print("X:", X_train.shape, X_test.shape)
print("Y:", Y_train.shape, Y_test.shape)

X: (576, 9) (192, 9)
Y: (576,) (192,)


In [24]:
model = Sequential()
model.add(Dense(9, input_dim = 9, kernel_initializer = 'normal', activation = 'relu'))
model.add(Dense(1, kernel_initializer = 'normal'))
model.compile(loss='mean_squared_error', optimizer='adam', metrics = ['accuracy'])
model.fit(X_train, Y_train, batch_size = 1, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7ff46de4f0f0>