# Homework 1
1. Build a Keras Model for linear regression (check: https://keras.io/activations/). Use Boston Housing Dataset to train and test your model.
2. Build a Keras Model for logistic regression. Use diabetes.csv to train and test your model.

### Comments
1. Build the simplest model for linear regression with Keras and compare your model performance with  
`from sklearn.linear_model import LinearRegression`
2. Build the simplest model for logistic regression with Keras and compare your model performance with  
`from sklearn.linear_model import LogisticRegression`
3. Add more complexity to your models in (1) and (2) and compare with previous results

## Imports

In [161]:
import warnings
warnings.filterwarnings('ignore')

from sklearn.datasets import load_boston
# from sklearn.datasets import load_breast_cancer

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix

import keras
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense

In [129]:
keras.backend.clear_session()

## Simple Linear Regression in Keras

In [126]:
bos = load_boston()
X, y = bos.data, bos.target

sc = StandardScaler()
X = sc.fit_transform(X)
y = sc.fit_transform(np.array(y).reshape(-1, 1))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [130]:
boston_model = Sequential()
boston_model.add(Dense(1, input_dim=13, activation="linear"))

boston_model.compile(loss="mse", optimizer="adam", metrics=["mse"])
boston_model.fit(X_train, y_train, epochs=100, batch_size=1, verbose=0)

In [133]:
boston_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 1)                 14        
Total params: 14
Trainable params: 14
Non-trainable params: 0
_________________________________________________________________


### Compare with `sk-learn`

In [134]:
loss, keras_mse = boston_model.evaluate(X_test, y_test, verbose=0)

In [135]:
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
sklearn_mse = mean_squared_error(y_test, y_pred)

In [136]:
print("MSE Keras NN\t", keras_mse)
print("MSE Sklearn LR\t", sklearn_mse)

MSE Keras NN	 0.32360557427531794
MSE Sklearn LR	 0.32215243724539483


<hr>

## Simple Logistic Regression in Keras

In [181]:
diabetes = pd.read_csv("datasets/diabetes.csv")
feature_columns = ['Pregnancies', 'Insulin', 'BMI', 'Age']

X = diabetes[feature_columns]
y = diabetes["Outcome"]

sc = StandardScaler()
X = sc.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [186]:
diabetes_model = Sequential()
diabetes_model.add(Dense(1, input_dim=4, activation="sigmoid"))

diabetes_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
diabetes_model.fit(X_train, y_train, epochs=100, batch_size=1, verbose=0)

<keras.callbacks.History at 0x13777f150>

In [187]:
diabetes_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 1)                 5         
Total params: 5
Trainable params: 5
Non-trainable params: 0
_________________________________________________________________


### Compare with `sk-learn`

In [192]:
loss, keras_accuracy = diabetes_model.evaluate(X_test, y_test, verbose=0)

In [190]:
log = LogisticRegression()
log.fit(X_train, y_train)
y_pred = log.predict(X_test)
confusion = confusion_matrix(y_test, y_pred)
sklearn_accuracy = (confusion[0,0] + confusion[1,1]) / sum(sum(confusion))

In [194]:
print("Accuracy Keras NN\t", keras_accuracy)
print("Accuracy Sklearn LR\t", sklearn_accuracy)

Accuracy Keras NN	 0.6883116890857746
Accuracy Sklearn LR	 0.683982683982684


https://missinglink.ai/guides/neural-network-concepts/complete-guide-artificial-neural-networks/

## Complex NN Boston

In [196]:
bos = load_boston()
X, y = bos.data, bos.target

sc = StandardScaler()
X = sc.fit_transform(X)
y = sc.fit_transform(np.array(y).reshape(-1, 1))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [259]:
complex_boston_model = Sequential()

# https://machinelearningmastery.com/regression-tutorial-keras-deep-learning-library-python/
complex_boston_model.add(Dense(13, input_dim=13, activation="relu"))
complex_boston_model.add(Dense(6, activation="relu"))
complex_boston_model.add(Dense(1))


complex_boston_model.compile(loss="mse", optimizer="adam", metrics=["mse"])
complex_boston_model.fit(X_train, y_train, epochs=100, batch_size=1, verbose=0)

<keras.callbacks.History at 0x137fb0690>

In [260]:
complex_boston_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_30 (Dense)             (None, 20)                280       
_________________________________________________________________
dense_31 (Dense)             (None, 1)                 21        
Total params: 301
Trainable params: 301
Non-trainable params: 0
_________________________________________________________________


### Compare with `sk-learn`

In [261]:
loss, keras_mse = complex_boston_model.evaluate(X_test, y_test, verbose=0)

In [262]:
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
sklearn_mse = mean_squared_error(y_test, y_pred)

In [263]:
print("MSE Keras NN\t", keras_mse)
print("MSE Sklearn LR\t", sklearn_mse)

MSE Keras NN	 0.18614121212771065
MSE Sklearn LR	 0.32215243724539483


## Complex NN Diabetes

In [264]:
diabetes = pd.read_csv("datasets/diabetes.csv")
feature_columns = ['Pregnancies', 'Insulin', 'BMI', 'Age']

X = diabetes[feature_columns]
y = diabetes["Outcome"]

sc = StandardScaler()
X = sc.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [307]:
diabetes_model = Sequential()
diabetes_model.add(Dense(16, input_dim=4, activation="relu"))
diabetes_model.add(Dense(4, activation="relu"))
diabetes_model.add(Dense(1, activation="sigmoid"))

diabetes_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
diabetes_model.fit(X_train, y_train, epochs=100, batch_size=1, verbose=0)

<keras.callbacks.History at 0x139259c50>

In [308]:
diabetes_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_57 (Dense)             (None, 16)                80        
_________________________________________________________________
dense_58 (Dense)             (None, 4)                 68        
_________________________________________________________________
dense_59 (Dense)             (None, 1)                 5         
Total params: 153
Trainable params: 153
Non-trainable params: 0
_________________________________________________________________


### Compare with `sk-learn`

In [309]:
loss, keras_accuracy = complex_diabetes_model.evaluate(X_test, y_test, verbose=0)

In [310]:
log = LogisticRegression()
log.fit(X_train, y_train)
y_pred = log.predict(X_test)
confusion = confusion_matrix(y_test, y_pred)
sklearn_accuracy = (confusion[0,0] + confusion[1,1]) / sum(sum(confusion))

In [311]:
print("Accuracy Keras NN\t", keras_accuracy)
print("Accuracy Sklearn LR\t", sklearn_accuracy)

Accuracy Keras NN	 0.7099567107307962
Accuracy Sklearn LR	 0.683982683982684
