In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler

columns=['Number of times pregnant',' Plasma glucose concentration a 2 hours in an oral glucose tolerance test',' Diastolic blood pressure','Triceps skin fold thickness','2-Hour serum insulin','Body mass index',' Diabetes pedigree function',' Age','Class variable']

pima_data = pd.read_csv('/content/sample_data/pima-diabetes.csv',names=columns)

# show dimension, datatype, and first 5 rows of pima_data.

print(pima_data.info())

print(pima_data.head())

print(pima_data.tail())

print(pima_data.shape)

print(pima_data.dtypes)

# for each attribute, show mean, count, std, min, max, etc

print(pima_data.describe())

# normalize every attribute (except target attribute) using MinMaxScaler

print("MINMAX SCALER")

pima_data_norm = pima_data.drop('Class variable', axis=1)

A = pima_data['Class variable']

scaler = MinMaxScaler()

df_scaled = scaler.fit_transform(pima_data_norm)

print(df_scaled)

# normalize every attribute (except target attribute) using StandardScaler

print("STANDARD SCALER")

scaler = StandardScaler()

pima_data_norm = pima_data.drop('Class variable', axis=1)

A = pima_data['Class variable']

df_scaled = scaler.fit_transform(pima_data_norm)

print(df_scaled)

# With .pop() command, ‘class’ target attribute is extracted.
# select input attributes without target attributes

Y = pima_data.pop('Class variable')
X = pima_data

# split X, Y into X_train, X_test, Y_train, Y_test

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

# Show that split is correctly done

print(X_train)
print(Y_train)
print(X_test)
print(Y_test)

# you can show the shape of each data & first 5 rows of each data

print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

print(X_train.head())
print(Y_train.head())
print(X_test.head())
print(Y_test.head())

model = Sequential()

# In the following, use sigmoid activation function and define input_dim

model.add(Dense(1, input_dim=8, activation='sigmoid'))

# 1) use ‘adam’ optimizer, 2) loss function is binary_crossentropy
# 3) metrics = accuracy

model.compile(optimizer='adam', loss='binary_crossentropy', metrics='accuracy')

# change epoch values

model.fit(X_train, Y_train, batch_size=20, epochs=40, validation_data=(X_test, Y_test))

model.evaluate(X_train,Y_train)

Y_predictions = model.predict(X_test)

print('prediction: ', Y_predictions.flatten())
print('Y_test: ', Y_test)

# 2) Change epochs, batch_size, and see the changes in performance. Try at least FIVE different combinations

print(" 1st time changing epochs, batch_size")

model.fit(X_train, Y_train, batch_size=25, epochs=45, validation_data=(X_test, Y_test))

model.evaluate(X_train,Y_train)

Y_predictions = model.predict(X_test)

print('prediction: ', Y_predictions.flatten())
print('Y_test: ', Y_test)

print(" 2nd time changing epochs, batch_size")

model.fit(X_train, Y_train, batch_size=35, epochs=50, validation_data=(X_test, Y_test))

model.evaluate(X_train,Y_train)

Y_predictions = model.predict(X_test)

print('prediction: ', Y_predictions.flatten())
print('Y_test: ', Y_test)


print(" 3rd changing epochs, batch_size")

model.fit(X_train, Y_train, batch_size=40, epochs=55, validation_data=(X_test, Y_test))

model.evaluate(X_train,Y_train)

Y_predictions = model.predict(X_test)

print('prediction: ', Y_predictions.flatten())
print('Y_test: ', Y_test)


print(" 4th changing epochs, batch_size")

model.fit(X_train, Y_train, batch_size=45, epochs=60, validation_data=(X_test, Y_test))

model.evaluate(X_train,Y_train)

Y_predictions = model.predict(X_test)

print('prediction: ', Y_predictions.flatten())
print('Y_test: ', Y_test)

print(" 5th changing epochs, batch_size")

model.fit(X_train, Y_train, batch_size=50, epochs=65, validation_data=(X_test, Y_test))

model.evaluate(X_train,Y_train)

Y_predictions = model.predict(X_test)

print('prediction: ', Y_predictions.flatten())
print('Y_test: ', Y_test)


# 3) Change error function to mean squared error, and explain the difference in performance
print('changing error funtion to mean squared error loss = mse ')

model = Sequential()

model.add(Dense(1, input_dim=8, activation='sigmoid'))

model.compile(optimizer='adam', loss='mse', metrics='accuracy')

model.fit(X_train, Y_train, batch_size=30, epochs=50, validation_data=(X_test, Y_test))

model.evaluate(X_train,Y_train)

Y_predictions = model.predict(X_test)

print('prediction: ', Y_predictions.flatten())
print('Y_test: ', Y_test)
