In [70]:
# Imports

import os, shutil
import pandas as pd

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import plot_model
from tensorflow.keras.utils import image_dataset_from_directory
from keras.metrics import AUC


import numpy as np
import matplotlib.pyplot as plt

In [71]:
def data_build():
    '''
    It seems that all of the images have different dimensions.
    I will use the resize_with_pad to push everything to the largest m x n dim,
    and then I will pool down from there with the goal of the padding 
    pixels falling out in the CNN.
    '''
    image_h = 500
    image_w = 500
    batch_size = 16  #GPU Saturated and memory issues if I go to 32...

    # location on disk of the image data
    loc = 'C:/Users/btb51/Documents/GitHub/DeepLearning_DAAN570/DAAN570_Instructor_Sample_Codes/Lesson_08_Code/Assignment2_ZooClassifier/Zoo Classifier project - images/images'

    #datasets will be a tuple of the train and validation data
    train_ds, val_ds = image_dataset_from_directory(loc,
                                  batch_size=batch_size,
                                  image_size = (image_h,image_w),  # set as largest dims
                                  shuffle = True,
                                  seed = 570,
                                  validation_split = 0.2,
                                  subset = 'both')

    return train_ds, val_ds

In [72]:
train_ds, val_ds = data_build()
class_names = train_ds.class_names
num_classes = len(class_names)
print(class_names, num_classes)

Found 3000 files belonging to 3 classes.
Using 2400 files for training.
Using 600 files for validation.
['cats', 'dogs', 'panda'] 3


In [73]:
def working_cnn(num_classes):
    
    #Build a Generic CNN with a set number of classes as the classifing output
    
    net = Sequential([
    tf.keras.layers.Rescaling(1./255, input_shape=(500, 500, 3)),
    tf.keras.layers.Conv2D(16,3,padding='same', activation = 'relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(32, 3, padding='same', activation = 'relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation= 'relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation = 'relu'),
    tf.keras.layers.Dense(num_classes)
    ])

    return net

In [83]:
working_net = working_cnn(num_classes)
lr = 1e-3
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = ['accuracy']

working_net.compile(optimizer=optimizer, loss=loss_fn, metrics=metrics)

In [84]:
working_net.summary()

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_9 (Rescaling)     (None, 500, 500, 3)       0         
                                                                 
 conv2d_27 (Conv2D)          (None, 500, 500, 16)      448       
                                                                 
 max_pooling2d_27 (MaxPoolin  (None, 250, 250, 16)     0         
 g2D)                                                            
                                                                 
 conv2d_28 (Conv2D)          (None, 250, 250, 32)      4640      
                                                                 
 max_pooling2d_28 (MaxPoolin  (None, 125, 125, 32)     0         
 g2D)                                                            
                                                                 
 conv2d_29 (Conv2D)          (None, 125, 125, 64)     

In [86]:
epochs = 1

history_working = working_net.fit(train_ds,
                           validation_data=val_ds,
                           epochs=epochs)

Epoch 1/2
Epoch 2/2


# Other setups

In [12]:
def softmax_cnn(num_classes):
    
    #Build a Generic CNN with a set number of classes as the classifing output
    
    net = Sequential([
    tf.keras.layers.Rescaling(1./255, input_shape=(500, 500, 3)),
    tf.keras.layers.Conv2D(16,3,padding='same', activation = 'relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(32, 3, padding='same', activation = 'relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation= 'relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation = 'softmax'),
    tf.keras.layers.Dense(num_classes)
    ])

    return net

In [13]:
softmax_net = softmax_cnn(num_classes)
lr = 1e-3
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False) #This should be true since I didn't specify softmax
metrics = ['accuracy']

softmax_net.compile(optimizer=optimizer, loss=loss_fn, metrics=metrics)

In [14]:
softmax_net.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_1 (Rescaling)     (None, 500, 500, 3)       0         
                                                                 
 conv2d_3 (Conv2D)           (None, 500, 500, 16)      448       
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 250, 250, 16)     0         
 2D)                                                             
                                                                 
 conv2d_4 (Conv2D)           (None, 250, 250, 32)      4640      
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 125, 125, 32)     0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 125, 125, 64)     

In [15]:
epochs = 10

history_softmax = softmax_net.fit(train_ds,
                           validation_data=val_ds,
                           epochs=epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [16]:
def softmax2_cnn(num_classes):
    
    #Build a Generic CNN with a set number of classes as the classifing output
    
    net = Sequential([
    tf.keras.layers.Rescaling(1./255, input_shape=(500, 500, 3)),
    tf.keras.layers.Conv2D(16,3,padding='same', activation = 'relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(32, 3, padding='same', activation = 'relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation= 'relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation = 'softmax'),
    tf.keras.layers.Dense(num_classes, activation = 'softmax')
    ])

    return net

In [17]:
softmax2_net = softmax_cnn(num_classes)
lr = 1e-3
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
metrics = ['accuracy']

softmax2_net.compile(optimizer=optimizer, loss=loss_fn, metrics=metrics)

In [18]:
softmax2_net.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_2 (Rescaling)     (None, 500, 500, 3)       0         
                                                                 
 conv2d_6 (Conv2D)           (None, 500, 500, 16)      448       
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 250, 250, 16)     0         
 2D)                                                             
                                                                 
 conv2d_7 (Conv2D)           (None, 250, 250, 32)      4640      
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 125, 125, 32)     0         
 2D)                                                             
                                                                 
 conv2d_8 (Conv2D)           (None, 125, 125, 64)     

In [19]:
epochs = 5

history_softmax2 = softmax2_net.fit(train_ds,
                           validation_data=val_ds,
                           epochs=epochs)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [59]:
def softmax_end_cnn(num_classes):
    
    #Build a Generic CNN with a set number of classes as the classifing output
    
    net = Sequential([
    tf.keras.layers.Rescaling(1./255, input_shape=(500, 500, 3)),
    tf.keras.layers.Conv2D(16,3,padding='same', activation = 'relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(32, 3, padding='same', activation = 'relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation= 'relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation = 'relu'),
    tf.keras.layers.Dense(num_classes, activation = 'softmax')
    ])

    return net

In [67]:
softmax_end_net = softmax_cnn(num_classes)
lr = 1e-3
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
metrics = ['accuracy']

softmax_end_net.compile(optimizer=optimizer, loss=loss_fn, metrics=metrics)

In [68]:
softmax_end_net.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_5 (Rescaling)     (None, 500, 500, 3)       0         
                                                                 
 conv2d_15 (Conv2D)          (None, 500, 500, 16)      448       
                                                                 
 max_pooling2d_15 (MaxPoolin  (None, 250, 250, 16)     0         
 g2D)                                                            
                                                                 
 conv2d_16 (Conv2D)          (None, 250, 250, 32)      4640      
                                                                 
 max_pooling2d_16 (MaxPoolin  (None, 125, 125, 32)     0         
 g2D)                                                            
                                                                 
 conv2d_17 (Conv2D)          (None, 125, 125, 64)     

In [69]:
epochs = 5

history_softmax_end = softmax_end_net.fit(train_ds,
                           validation_data=val_ds,
                           epochs=epochs)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


# Binary Classifier


There is a new data import here

In [40]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Dense
import pandas as pd
import numpy as np
import requests
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.utils import plot_model
from keras.layers import concatenate
from keras.metrics import AUC

from sklearn.model_selection import train_test_split


In [52]:

def binary_model():
    
    #single hidden layer of 12 nodes
    model_input = Input(shape=(8,), name='data_in')
    hidden_layer_1 = Dense(units=12, activation='relu', name='HL_1')(model_input)
    model_out = Dense(1, activation='softmax', name='data_out')(hidden_layer_1)
    
    #create the model by linking inputs and outputs through Keras functional API
    model = Model(inputs=model_input, outputs=model_out, name='Diabetes')
    
    return model

In [34]:
file = 'C:\\Users\\btb51\\Documents\\GitHub\\DeepLearning_DAAN570\\DAAN570_Instructor_Sample_Codes\\Lesson_06_Code\\archive\\diabetes.csv'

file = "C:/Users/btb51/Documents/GitHub/DeepLearning_DAAN570/DAAN570_Instructor_Sample_Codes/Lesson_06_Code/Assignment_01/archive/diabetes.csv"
data = pd.read_csv(file)



In [39]:


#Turn missing values to NANs with the exception of pregnacies
data["BloodPressure"].replace(to_replace=0, value=np.NAN, inplace=True)
data["SkinThickness"].replace(to_replace=0, value=np.NAN, inplace=True)
data["Insulin"].replace(to_replace=0, value=np.NAN, inplace=True)

#It may be beneficial to_replace with the average of the column if the zeros
#push values

#drop the duplicates keeping the first instance of any dups
data = data.drop_duplicates(keep='first')

#Check for outliers (keep anything where all data cols are within 3 std dev)
data = data[(np.abs(stats.zscore(data, nan_policy='omit')) < 3).all(axis=1)]

#Deal with the class imbalance
from imblearn.over_sampling import SMOTE

#splice data
y = data.iloc[:, 8]
x = data.iloc[:,:8]

#make the SMOTE object
oversample = SMOTE()

#Restore balance
x, y = oversample.fit_resample(x,y)

#Check the balance
print("Length of x: " + str(len(x)))
print("Length of y: " + str(len(y)))


#use minmaxscaler
scaler = MinMaxScaler()


data_x = scaler.fit_transform(x)

Length of x: 506
Length of y: 506


In [41]:
#train test split
X_train, X_test, y_train, y_test = train_test_split(data_x, y, test_size=0.25,
                                                    random_state=570)

In [57]:
binary_net = binary_model()

#USING RMSProp
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)

bi_loss = tf.keras.losses.BinaryCrossentropy(from_logits = True)

metric = [tf.keras.metrics.BinaryAccuracy(),
          tf.keras.metrics.FalsePositives(),
          tf.keras.metrics.AUC(curve='ROC')]

binary_net.compile(optimizer=optimizer, loss=bi_loss, metrics=metrics)

In [58]:
history_binary = binary_net.fit(X_train, y_train, batch_size=128, epochs=5, verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
