In [1]:
# This is a practice Artificial Neural Network
# The problem being solved is based off of a model bank with fake data
# The bank has customers that have left for whatever reason
# The goal is to find why these customers have left using information such as account balance and gender
# The last column of the data states whether or not the customer has left the bank

import os # Navigating directories
import nibabel as nib # For nii files
import math

import numpy as np #Math operations library
import matplotlib.pyplot as plt #Visualization library
import pandas as pd #Matrix handler

import keras # Brings in tensorflow with it
from keras.models import Sequential # Used for initialization of ANN
from keras.layers import Dense, Conv3D, MaxPooling3D, Flatten # adds layers to ANN
from keras.wrappers.scikit_learn import KerasClassifier # ability to turn network into a function definition
from keras.layers import Dropout

from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler #Methods to change categorical strings to numbers and scaling ability
from sklearn.model_selection import train_test_split # Splits data into training and testing
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix # Creates truth table for evaluating results

parameters = {'batch_size': 1,#4,
              'epochs': 500,
              'optimizer': 'rmsprop',#'adam'],
              'nodes': 6,
              'hidden_layers': 1,
              'num_features': 20,
             } # Creates list of parameters to test to find most successful one

class build_batcher:
    def __init__(self, pickle_loc):
        self.df = pd.read_pickle(pickle_loc)
        self.df = self.df.sample(frac=1).reset_index(drop=True)
        self.tenth = math.floor(self.df.shape[0] / 10)
        self.set_cross_val(1)
    
        self.max = 3800
        # The following lines find the max value for normalization, however it takes a while. The max is 3799 for this set
        # for y in range(0, len(self.df)):
        #     img = nib.load(self.df.iloc[y]['Location'])
        #     data = img.get_fdata()
        #     local_max = max(data.flatten())
        #     if local_max > self.max:
        #         self.max = local_max
        #         print(self.max)
        
    def set_cross_val(self, iteration):
        if iteration > 10:
            raise ValueError('Crossval Iteration Exceeds 10')
        
        iteration = iteration - 1
        self.index_array = list(range(0, iteration*self.tenth)) + list(range((iteration+1)*self.tenth, self.df.shape[0]))
        self.test_array = list(range(iteration*self.tenth, (iteration+1)*self.tenth))
        self.index = 0
        self.epoch = 0
        
    def next_batch(self, batch_size):
        fetch_array = list(range(0, batch_size))
        fetch_array = [x + self.index for x in fetch_array]
        
        self.index = self.index + batch_size
        if self.index > len(self.index_array) - 1:
            self.index = self.index - len(self.index_array) - 1
            self.epoch = self.epoch + 1
        
        copy = list([])
        for x in fetch_array:
            if x > len(self.index_array) - 1:
                x = x - len(self.index_array) - 1
            copy.append(x)
        
        fetch_array = copy
        del copy
        
        x_batch = []
        y_batch = []
        for pic in fetch_array:
            row = self.df.iloc[self.index_array[pic]]
            nii_data = nib.load(row['Location'])
            img_data = nii_data.get_fdata()
            img_data.resize(79, 95, 79, 1)
            
            x_batch.append(img_data)
            y_batch.append([row['Rest'], row['Emote'], row['Solve']])
            
        x_batch = np.asarray(x_batch)
        x_batch = np.true_divide(x_batch, self.max)
        y_batch = np.asarray(y_batch)
        return(x_batch, y_batch)
        
        
    def get_epoch(self):
        return(self.epoch)
    
    def get_index(self):
        return(self.index)
    
    def test_classifier(self, classifier):
        total = 0
        correct = 0
        for case in self.test_array:
            row = self.df.iloc[case]
            nii_data = nib.load(row['Location'])
            img_data = nii_data.get_fdata()
            img_data.resize(1, 79, 95, 79, 1)
            classes = classifier.predict(img_data, batch_size=1)
            
            total = total + 1
            if classes[0] == row['Rest'] & classes[1] == row['Emote'] & classes[2] == row['Solve']:
                correct = correct + 1
        
        return(correct / total)

def build_classifier(nodes, hidden_layers, num_features):
    classifier = Sequential() # This is the ANN object
    classifier.add(Conv3D(32, (3, 3, 3), input_shape=(79, 95, 79, 1), strides=(1, 1, 1), padding='valid', data_format=None, dilation_rate=(1, 1, 1), activation='relu', use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None))
    #Conv3D(32, (3, 3, 3), input_shape=(79, 95, 79), activation='relu')) # 32 kernels, 3x3; input image is rgb and 64x64
    #Note: images are not the same size and will need adjustment prior to training
    #Note: docs using Theano backend reference the input shape with the channel number first
    classifier.add(MaxPooling3D(pool_size=(2, 2, 2))) # Mix a 4x4 square into a 1x1
    classifier.add(Conv3D(32, (3, 3, 3), activation='relu'))
    classifier.add(MaxPooling3D(pool_size=(2, 2, 2)))
    classifier.add(Flatten())
    
    classifier.add(Dense(input_dim=num_features, units=nodes, kernel_initializer='uniform', activation='relu')) #Creates first hidden layer
    classifier.add(Dropout(rate=0.1))
    
    for i in range(0, hidden_layers):
        classifier.add(Dense(units=nodes, kernel_initializer='uniform', activation='relu')) # Second layer. Input dim is known from previous layer
        classifier.add(Dropout(rate=0.1))
    
    classifier.add(Dense(units=3, kernel_initializer='uniform', activation='softmax')) # Output layer. Only 1 ouput category, sigmoid activation to get probability of sureness
    classifier.add(Dropout(rate=0.1))

    # Note: Softmax applies to a dependent variable that has more than 2 categories
    # i.e. fMRI categorizations

    classifier.compile(optimizer='rmsprop', loss="categorical_crossentropy", metrics=['accuracy'])
    # Notes
    # adam is a kind of stochastic gradient descent
    # For multivariabel, use categorical cross entropy
    # Accuracy is predefined
    return classifier
# Creates a standard Keras type classifier composed of the defined network for
# k-means testing

classifier = build_classifier(parameters['nodes'], parameters['hidden_layers'], parameters['num_features'])
batcher = build_batcher('/media/jasondent/My Passport/PNC cohort 200/contents.pkl')

Using TensorFlow backend.


In [3]:
# model.train_on_batch(x_batch, y_batch)
# classes = model.predict(x_test, batch_size=128)

epoch = 0
while(epoch < parameters['epochs']):
    x_batch, y_batch = batcher.next_batch(parameters['batch_size'])
    loss = classifier.train_on_batch(x_batch, y_batch)
    prediction = classifier.predict(x_batch, batch_size=parameters['batch_size'])
    print(y_batch)
    print(prediction)
    print('epoch:\t' + str(epoch + 1) + '\tindex:\t' + str(batcher.get_index()))
    if epoch != batcher.get_epoch():
        epoch = batcher.get_epoch()
# classifier.train_on_batch(data, np.array([0]))

[[0 0 1]]
[[0. 0. 0.]]
epoch:	1	index:	31
[[0 0 1]]
[[0. 0. 1.]]
epoch:	1	index:	32
[[1 0 0]]
[[0. 0. 0.]]
epoch:	1	index:	33
[[0 0 1]]
[[0. 0. 0.]]
epoch:	1	index:	34


KeyboardInterrupt: 

In [8]:
epochs

<bound method build_batcher.get_epoch of <__main__.build_batcher object at 0x7fbc08591ba8>>