In [89]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Imports
import matplotlib.pyplot as plt #This package is for plotting
%matplotlib inline  
import numpy as np
import pandas as pd

# Keras Imports
import keras
from keras.models import Sequential
from keras.layers import Dense, Input, Activation
from keras.optimizers import SGD
from keras.initializers import RandomNormal
from keras.models import load_model

# Sklearn Imports
from sklearn.datasets import load_boston, load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import mean_squared_error, accuracy_score

# Homework 1 - Andrey Novichkov
## Using Keras for linear/logistic regression on 2 datasets

First, we want to build the simplest Keras NN and compare it to sklearn's output.  
Then, we want to improve and optimize our Keras and Sklearn implementations to see how good we can make the models

## Linear Regression w/ simple Keras

In [75]:
# Define our input data
boston = load_boston()
X, y = boston.data, boston.target

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=0)

In [77]:
# Build model
model = Sequential()
# input layer w/ 1 Neuron
model.add(Dense(1, input_shape=(X_train.shape[1],)))
# output layer with linear activation
model.add(Dense(1, activation='linear'))
# Compile model with MSE loss and adam optimizer
model.compile(loss='mse', optimizer='adam', metrics=['mean_squared_error'])
# Fit the model with 100 epochs and batch size of 1, because dataset very small
model.fit(X_train, y_train, epochs=100, batch_size=1, verbose=0)
loss, mse = model.evaluate(X_test, y_test)
print(f'mse: {round(mse, 2)}')

mse: 39.55


## Linear Regression w/ Sklearn
We are going to use the same X_train, X_test and y_train and y_test

In [78]:
lr = LinearRegression()
lr = lg.fit(X_train, y_train)
y_pred = lr.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'mse: {round(mse, 2)}')

mse: 29.78


**As we can see, sklearn's linear regression model performed better than the keras one with the parameters that I used for building the Keras NN**

## Logistic regression w/ simple Keras
Going to use diabetes.csv dataset

In [81]:
# Get the data
df = pd.read_csv('diabetes.csv')
y = df['Outcome'].to_numpy()
X = df.drop('Outcome', axis=1).to_numpy()

# Split data
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=.25, random_state=0)

In [69]:
# Build model
model = Sequential()

model.add(Dense(1, input_shape=(X_train.shape[1],)))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=200, batch_size=5, verbose=0)

<keras.callbacks.History at 0x136379e90>

In [72]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'accuracy: {round(accuracy*100, 2)}%')

accuracy: 75.52%


## Logistic Regression w/ Sklearn
Going to use same training data as with Keras

In [92]:
lg = LogisticRegression()
lg = lg.fit(X_train, y_train)
y_pred = lg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'accuracy: {round(accuracy*100, 2)}%')

accuracy: 79.17%


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


**As we can see, logistic regression was also better with sklearn without any optimizations done on either the simple Keras NN and sklearn models**

# Let's optimize both linear and logistic regression for Keras and Sklearn

## Optimized Linear Regression for Keras

In [93]:
# Define our input data
boston = load_boston()
X, y = boston.data, boston.target

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=0)

In [106]:
model = Sequential()
model.add(Dense(64, input_shape=(X_train.shape[1],), activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(optimizer='adam', loss='mse', metrics=['mean_squared_error'])
model.fit(X_train, y_train, epochs=100, batch_size=1, verbose=0)
loss, mse = model.evaluate(X_test, y_test)
print(f'mse: {round(mse, 2)}')

mse: 25.23
