In [102]:
# For Reproducable Code
from numpy.random import seed
seed(42)
import tensorflow
tensorflow.random.set_seed(42)
from cupy.random import seed
seed(42)

# other standard imports. "pip install -r requirements.txt" to install dependencies and "pip freeze > requirements.txt" to update them
from collections import Counter
import pickle

import PIL
import scipy
import matplotlib.pyplot as plt
import numpy as np
import cupy as cp
from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score
from dbn import SupervisedDBNClassification

In [103]:
# Useful Functions
def pklSave(contentToBeSaved, fullPath):
    with open(fullPath, 'wb') as f:
        pickle.dump(contentToBeSaved, f)

def pklLoad(fullPath, convertToNumpyArray=False):
    with open(fullPath, 'rb') as f:
        content = pickle.load(f)
    if convertToNumpyArray:
        content = np.array(content)
    return content

def train_val_test_split(x, y, tr, va, te, **kwargs):
    x_train, x_test_val, y_train, y_test_val = train_test_split(x, y, test_size=1-tr)
    x_val, x_test, y_val, y_test = train_test_split(x_test_val, y_test_val, test_size=te/(te+va), **kwargs)
    return (x_train, x_val, x_test, y_train, y_val, y_test)

In [104]:
def custom_learning_curve(estimator, x, y, train_sizes = None, test_sizes = None): # pass x, y as lists or nparrays
    if train_sizes is None:
        train_sizes = [0.2,0.4,0.6,0.8,1]
    if test_sizes is None:
        test_sizes = [0.2,0.2,0.2,0.2,0.2]
    
    train_accs = []
    test_accs = []

    if type(estimator) == SupervisedDBNClassification:
        for i in range(len(train_sizes)):
            if train_sizes[i] == 1:
                x_new, y_new = x, y
            else:
                x_new, _, y_new, _ = train_test_split(x, y, test_size=1-train_sizes[i], random_state=42)

            x_train, x_test, y_train, y_test = train_test_split(x_new, y_new, test_size=test_sizes[i], random_state=42)
            x_train = cp.array(x_train)
            x_test = cp.array(x_test)
            y_train = cp.array(y_train) 
            y_test = cp.array(y_test)

            estimator.fit(x_train, y_train)

            y_pred_train = np.array(estimator.predict(x_train))
            y_pred_test = np.array(estimator.predict(x_test))
            y_train = np.array(y_train.get()) #converts cp array to np array to be compatible with accuracy_score
            y_test = np.array(y_test.get())

            train_accs.append(accuracy_score(y_train, y_pred_train))
            test_accs.append(accuracy_score(y_test, y_pred_test))
        return (train_sizes, train_accs, test_accs)

# Fitting the model on Rescaled Images

In [23]:
x = pklLoad('../x128.pkl', convertToNumpyArray=True)
y = pklLoad('../y.pkl', convertToNumpyArray=True)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
x_train = cp.array(x_train)
x_test = cp.array(x_test)
y_train = cp.array(y_train)
y_test = cp.array(y_test)

In [24]:
len(x_train), len(x_test), len(y_train), len(y_test)

(1865, 467, 1865, 467)

In [25]:
x_train.shape

(1865, 16384)

In [101]:
classifier = SupervisedDBNClassification(hidden_layers_structure=[128, 128],
                                         learning_rate_rbm=0.1, #0.05 with 100 epochs == 0.1 with 50 epochs
                                         learning_rate=0.1,
                                         n_epochs_rbm=50,
                                         n_iter_backprop=10, # loss was found to be stagnating after this value
                                         batch_size=32,
                                         activation_function='sigmoid',
                                         dropout_p=0.1) # low drop-out value, as model is underfitting

In [34]:
classifier.fit(x_train, y_train)

[START] Pre-training step:
>> Epoch 1 finished 	RBM Reconstruction error 1022.598978
>> Epoch 2 finished 	RBM Reconstruction error 979.957077
>> Epoch 3 finished 	RBM Reconstruction error 923.329255
>> Epoch 4 finished 	RBM Reconstruction error 896.948039
>> Epoch 5 finished 	RBM Reconstruction error 870.936445
>> Epoch 6 finished 	RBM Reconstruction error 859.639396
>> Epoch 7 finished 	RBM Reconstruction error 841.492790
>> Epoch 8 finished 	RBM Reconstruction error 836.304946
>> Epoch 9 finished 	RBM Reconstruction error 828.798921
>> Epoch 10 finished 	RBM Reconstruction error 820.018319
>> Epoch 11 finished 	RBM Reconstruction error 815.087342
>> Epoch 12 finished 	RBM Reconstruction error 808.531551
>> Epoch 13 finished 	RBM Reconstruction error 805.963824
>> Epoch 14 finished 	RBM Reconstruction error 800.518216
>> Epoch 15 finished 	RBM Reconstruction error 795.104386
>> Epoch 16 finished 	RBM Reconstruction error 789.508972
>> Epoch 17 finished 	RBM Reconstruction error 787.11

Output of the fitting: <br>
<img src='notebook_media/' width=400 />

In [83]:
classifier.save('dbnModel1.pkl')

In [84]:
classifier = SupervisedDBNClassification.load('dbnModel1.pkl')

In [35]:
y_pred = classifier.predict(x_train) # predict returns a list
y_pred = np.array(y_pred) # converting to nparray to be compatible with accuracy_score
y_train_np = np.array(y_train.get())
y_test_np = np.array(y_test.get())
print(f'Training Accuracy: {accuracy_score(y_train_np, y_pred)}')
y_pred = classifier.predict(x_test)
y_pred = np.array(y_pred)
print(f'Testing Accuracy: {accuracy_score(y_test_np, y_pred)}')

Training Accuracy: 0.4584450402144772
Testing Accuracy: 0.4068522483940043


# Fitting the model on Features Extracted From the Images

In [72]:
xf = pklLoad('../x128f.pkl', convertToNumpyArray=True)
y = pklLoad('../y.pkl', convertToNumpyArray=True)
x_train, x_test, y_train, y_test = train_test_split(xf, y, test_size=0.2, random_state=42)
x_train = cp.array(x_train)
x_test = cp.array(x_test)
y_train = cp.array(y_train)
y_test = cp.array(y_test)

In [61]:
classifier = SupervisedDBNClassification(hidden_layers_structure=[128, 128],
                                         learning_rate_rbm=0.05,
                                         learning_rate=0.1,
                                         n_epochs_rbm=1,
                                         n_iter_backprop=1,
                                         batch_size=32,
                                         activation_function='sigmoid',
                                         dropout_p=0.2)

In [62]:
classifier.fit(x_train, y_train)

[START] Pre-training step:
>> Epoch 1 finished 	RBM Reconstruction error 17.862259
>> Epoch 1 finished 	RBM Reconstruction error 0.039180
[END] Pre-training step
[START] Fine tuning step:
>> Epoch 1 finished 	ANN training loss 10.851956
[END] Fine tuning step


Output of the fitting: <br>
<img src='notebook_media/' width=400 />

In [None]:
classifier.save('dbnModel128f.pkl')

In [None]:
classifier = SupervisedDBNClassification.load('dbnModel128f.pkl')

In [66]:
y_pred = classifier.predict(x_train) # predict returns a list
y_pred = np.array(y_pred) # converting to nparray to be compatible with accuracy_score
y_train_np = np.array(y_train.get())
y_test_np = np.array(y_test.get())
print(f'Training Accuracy: {accuracy_score(y_train_np, y_pred)}')
y_pred = classifier.predict(x_test)
y_pred = np.array(y_pred)
print(f'Testing Accuracy: {accuracy_score(y_test_np, y_pred)}')

Training Accuracy: 0.17158176943699732
Testing Accuracy: 0.15417558886509636


In [64]:
classifier = SupervisedDBNClassification(hidden_layers_structure=[128, 128],
                                         learning_rate_rbm=0.05,
                                         learning_rate=0.1,
                                         n_epochs_rbm=1,
                                         n_iter_backprop=1,
                                         batch_size=32,
                                         activation_function='sigmoid',
                                         dropout_p=0.2,
                                         verbose=False)

## on 64x64

In [4]:
xf = pklLoad('../x64f.pkl', convertToNumpyArray=True)
y = pklLoad('../y.pkl', convertToNumpyArray=True)
x_train, x_test, y_train, y_test = train_test_split(xf, y, test_size=0.2, random_state=42)
x_train = np.array(x_train)
x_test = np.array(x_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

In [5]:
classifier = SupervisedDBNClassification(hidden_layers_structure=[128, 128],
                                         learning_rate_rbm=0.05,
                                         learning_rate=0.1,
                                         n_epochs_rbm=5,
                                         n_iter_backprop=10,
                                         batch_size=32,
                                         activation_function='sigmoid',
                                         dropout_p=0.2)

In [6]:
classifier.fit(x_train, y_train)

[START] Pre-training step:
>> Epoch 1 finished 	RBM Reconstruction error 19.264810
>> Epoch 2 finished 	RBM Reconstruction error 19.104714
>> Epoch 3 finished 	RBM Reconstruction error 19.072024
>> Epoch 4 finished 	RBM Reconstruction error 19.067058
>> Epoch 5 finished 	RBM Reconstruction error 19.065036
>> Epoch 1 finished 	RBM Reconstruction error 0.039559
>> Epoch 2 finished 	RBM Reconstruction error 0.046132
>> Epoch 3 finished 	RBM Reconstruction error 0.051636
>> Epoch 4 finished 	RBM Reconstruction error 0.050275
>> Epoch 5 finished 	RBM Reconstruction error 0.048225
[END] Pre-training step
[START] Fine tuning step:
>> Epoch 1 finished 	ANN training loss 10.922973
>> Epoch 2 finished 	ANN training loss 10.809882
>> Epoch 3 finished 	ANN training loss 10.828573
>> Epoch 4 finished 	ANN training loss 10.808350
>> Epoch 5 finished 	ANN training loss 10.827409
>> Epoch 6 finished 	ANN training loss 10.792239
>> Epoch 7 finished 	ANN training loss 10.798962
>> Epoch 8 finished 	ANN 

Output of the fitting: <br>
<img src='notebook_media/' width=400 />

In [None]:
classifier.save('dbnModel64f.pkl')

In [None]:
classifier = SupervisedDBNClassification.load('dbnModel64f.pkl')

In [7]:
y_pred = classifier.predict(x_train)
print(f'Training Accuracy: {accuracy_score(y_train, y_pred)}')
y_pred = classifier.predict(x_test)
print(f'Testing Accuracy: {accuracy_score(y_test, y_pred)}')

Done.
Training Accuracy: 0.16836461126005361
Done.
Testing Accuracy: 0.16488222698072805
