# CS-660 Semester Project Notebook 1

## Stan Rosenbaum

Fall 2017

Anaconda 5 / Python 3

Using Keras with TensorFlow as back end.

### Background Important Stuff

First the Enums of the Classes

* "NA" = 0
* "UP" = 1
* "DOWN" = 2
* "HOLE" = 3


In [19]:
# First we init stuff.
# Load the Basic Python Libraries
import os
import csv
import PIL
import pickle
import random
import datetime

# Load my Data Management Module
import CS660DataManagement as csDM

# load numpy
import numpy as np

# Load Keras Stuff
import keras
import keras.backend as K
from keras import layers
from keras.layers import Input, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D
from keras.layers import AveragePooling2D, MaxPooling2D, Dropout, GlobalMaxPooling2D, GlobalAveragePooling2D
from keras.models import Model, Sequential
from keras.models import load_model
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras.applications.imagenet_utils import preprocess_input
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model
from keras.initializers import glorot_uniform
from keras.callbacks import TensorBoard
from keras.utils.np_utils import to_categorical

K.set_image_data_format('channels_last')

# Other.  Mostly Graphic stuff for displaying Data in and out of Jupyter.
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import pydot
import graphviz
from IPython.display import SVG

# Not using Quiver Yet.
# from quiver_engine import server

%matplotlib inline

# Get Processed Data Directory.
processedDataDir = os.path.join( os.getcwd(), os.pardir, 'DATA', 'PROCESSED' )
combinedDataDir = os.path.join( os.getcwd(), os.pardir, 'DATA', 'COMBINED' )
pickleDataDir = os.path.join( os.getcwd(), os.pardir, 'DATA', 'PICKLES' )
modelsDirectory = os.path.join( os.getcwd(), os.pardir, 'MODELS' )
modelsStructsDirectory = os.path.join( os.getcwd(), os.pardir, 'MODELS_STRUCTS' )
weightsDirectory = os.path.join( os.getcwd(), os.pardir, 'WEIGHTS' )
resultsDirectory = os.path.join( os.getcwd(), os.pardir, 'RESULTS' )

testImageColorFile = os.path.join( os.getcwd(), os.pardir, 'DATA', 'COMBINED', 'ICOLOR', 'TEST.png' )
testImageDepthFile = os.path.join( os.getcwd(), os.pardir, 'DATA', 'COMBINED', 'IDEPTH', 'TEST.png' )
testCSVFile = os.path.join( os.getcwd(), os.pardir, 'DATA', 'COMBINED', 'PCLOUD', 'TEST.csv' )

tensorLogDataDir = os.path.join( os.getcwd(), os.pardir, 'TENSOR_LOGS' )

imageDirs = ['ICOLOR', 'IDEPTH']
csvDirs = ['PCLOUD']

allDataFlavors = imageDirs + csvDirs

#Load main data file.
searCSVInfoFile = os.path.join( combinedDataDir, 'SEAR_DC_INFO.csv' )

csDM.CS660DataManagementCheck()

CS660DataManagementCheck Imported


### Load Model Data

Unless we need to train model.

In [2]:
def loadModel( modelName ):
    """
    Loads the Model.
    """
    theModel = load_model( os.path.join( modelsDirectory, modelName) )
    print('MODEL Loaded.')
    return theModel


theModel = loadModel( 'JUPYTER_MODEL')

MODEL Loaded.


### Saves the model

In [6]:
def saveModelEverything( theModel, modelName ):
    """
    Saved Everything in regards to the model.
    """
    saveModelStructure( theModel, modelName )
    saveModel( theModel, modelName )
    saveModelJSON( theModel, modelName )
    saveModelWeights( theModel, modelName )
    
    print("Model Everything Saved")
    

def saveModelStructure( theModel, modelStructureName ):
    """
    Saves an image of the Model Structure.
    """
    modelStructsFilePath = os.path.join(modelsStructsDirectory, modelStructureName )
    plot_model(theModel, to_file=modelStructsFilePath)


def saveModelJSON( model, modelName ):
    """
    Saves the Model as JSON
    Args:
        model: the Keras NN Model
        modelName: the Name

    Returns:

    """
    modelFilePath = os.path.join( modelsDirectory, modelName + '.json' )
    model_json = theModel.to_json()
    with open( modelFilePath, 'w') as json_file:
        json_file.write(model_json)

        
def saveModel( model, modelName ):
    """
    Save the model, in Keras [h5] format.
    """
    theModel.save(os.path.join( modelsDirectory, modelName ))

    
def saveModelWeights( theModel, modelName ):
    """
    Saved the Model Weights
    Args:
        weights: The Weights
        weightName: Weight Names

    Returns:

    """
    weightsFilePath = os.path.join( weightsDirectory, modelName + '.h5' )
    theModel.save_weights( weightsFilePath )
    
    
saveModelEverything( theModel, 'JUPYTER_MODEL')

Model Everything Saved


### Check the stats on the data set.

In [8]:
print( len( csDM.getListOfDataCSVFileKeys() ) )
csDM.reportStats()

901
Number of Everything
UPs: 278
DOWNs: 201
NAs: 204
HOLEs: 218


###  Build a model with Keras



In [2]:
# import keras
# from quiver_engine import server

# input image dimensions
# img_rows, img_cols = 480, 640

# num_classes = 4

def buildModel( numOfNodes=48, numOfLayers=1):
    """
    Builds the basic model.
    Returns:
        A Keras NN Model

    """
    # input image dimensions
    img_rows, img_cols = 480, 640
    input_shape = (img_rows, img_cols, 1)
    num_classes = 4

    print("Building Model with ", numOfNodes, " nodes and ", numOfLayers, " layers.")

    theModel = Sequential()

    theModel.add(
        Conv2D(5,
               kernel_size=(5, 5),
               strides=3,
               activation='relu',
               input_shape=input_shape
               )
    )
    theModel.add(
        MaxPooling2D(
            pool_size=(2, 2)
        )
    )

    theModel.add(
        Conv2D(
            10,
            kernel_size=(3, 3),
            strides=2,
            activation='relu')
    )
    theModel.add(
        MaxPooling2D(
            pool_size=(2, 2),
            strides=2
        )
    )

    theModel.add(Flatten())

    for index in range( numOfLayers ):
        theModel.add(Dense(numOfNodes))
        theModel.add(BatchNormalization())
        theModel.add(Activation('relu'))
        theModel.add(Dropout(0.25))

    theModel.add(Dense(num_classes, activation='softmax'))

    theModel.compile(
        loss=keras.losses.categorical_crossentropy,
        optimizer=keras.optimizers.Adam(),
        metrics=['categorical_accuracy']
    )

    theModel.summary()
    
    return theModel
    
# server.launch(model)

theModel = buildModel()


Building Model with  48  nodes and  1  layers.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 159, 212, 5)       130       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 79, 106, 5)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 39, 52, 10)        460       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 19, 26, 10)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 4940)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 48)                237168    
_________________________________________________________________
batch_normalization_1 (Batch 

### Load a Dataset for Training

In [3]:
X_train, Y_train, X_test, Y_test = csDM.load_dataset( '0', 'IDEPTH' )

###  Training the Model.

In [4]:
def trainModel( trainingName, theModel, x_train, y_train, x_test, y_test, num_classes=4 ):
    """
    Trains the model via given data.

    Args:
        trainingName: A name of this train [mainly to track in TensorBoard
        x_train: The X Set for Trainings
        y_train: The Y set for Testing
        x_test:  The X Set for Training/Verification
        y_test:  The Y Set for Testing/Verification

    Returns:

    """
    img_rows, img_cols = 480, 640
    
    # Reshape the X sets.
    # Mainly for this project.because Keras/Tensor thinks in Channels.
    # And since we are using Greyscale data, we really don't have a channel.
    # So we have to 'fake' a channel
    #
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)

    # Convert class vectors to binary class matrices
    y_train_as_category = to_categorical(y_train, num_classes)
    y_test_as_category = to_categorical(y_test, num_classes)

    logFilePath = os.path.join( tensorLogDataDir, trainingName )

    TBoardCallback = keras.callbacks.TensorBoard(
        log_dir=logFilePath,
        histogram_freq=0,
        write_graph=True,
        write_images=True
    )

    theModel.fit(x_train,
              y_train_as_category,
              batch_size=16,
              epochs=24,
              verbose=1,
              validation_data=(x_test, y_test_as_category),
              callbacks=[TBoardCallback]
              )
    
    return theModel



# X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
# X_test = X_test.reshape(X_train.shape[0], img_rows, img_cols, 1)

# # convert class vectors to binary class matrices
# y_train_as_category = to_categorical(y_train, num_classes)
# y_test_as_category = to_categorical(y_test, num_classes)

theModel = trainModel( "JupyterTESTRUN", theModel, X_train, Y_train, X_test, Y_test, 4)

Train on 200 samples, validate on 40 samples
Epoch 1/24
Epoch 2/24
Epoch 3/24
Epoch 4/24
Epoch 5/24
Epoch 6/24
Epoch 7/24
Epoch 8/24
Epoch 9/24
Epoch 10/24
Epoch 11/24
Epoch 12/24
Epoch 13/24
Epoch 14/24
Epoch 15/24
Epoch 16/24
Epoch 17/24
Epoch 18/24
Epoch 19/24
Epoch 20/24
Epoch 21/24
Epoch 22/24
Epoch 23/24
Epoch 24/24


### Load Dataset for Testing.

In [3]:
X_train_Z, Y_train_Z, X_test_Z, Y_test_Z = csDM.load_dataset( '0', 'IDEPTH' )

### Evaluate the Model

In [4]:
def evaluateModel( theModel, x_test, y_test, num_classes):
    """
    Evaluated the Model.
    
    Parameters:
        theModel:
        x_test:
        y_test:
        num_classes:
        
    Return:
    
    """
    img_rows, img_cols = 480, 640
    
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    y_test_as_category = to_categorical(y_test, num_classes)

    score = theModel.evaluate(x_test, y_test_as_category, verbose=0)
    print('General > Test loss: ', score[0], 'Test accuracy: ', score[1] )

    predictionResults = theModel.predict_classes(x_test, verbose=1)
    
    scoringList = [0, 0, 0, 0]
    scoringListAsPecents = []
    
    for index in range(len(x_test)):
        if( predictionResults[index] == y_test[index] ):
#             print( index, 'Results: ', predictionResults[index], " VS ", y_test[index], "Match" )
            scoringList[ int(y_test[index]) ] = scoringList[ int(y_test[index]) ] + 1
#         else:
#             print( index, 'Results: ', predictionResults[index], " VS ", y_test[index], "No Match" )
    
    for element in scoringList:
        scoringListAsPecents.append( element / 10.0 )
    
#     print( scoringList )
        
    return { 'SCORE': score, 'SCORELIST' : scoringListAsPecents }


# scoringResults = evaluateModel( theModel, X_test, Y_test, 4)
scoringResults = evaluateModel( theModel, X_test_Z, Y_test_Z, 4)
print(scoringResults['SCORELIST'])

General > Test loss:  0.809247305989 Test accuracy:  0.675
[1.0, 0.6, 0.4, 0.7]



### Human Oracle Part

In [20]:
def reportOracle( status ):
    """
    """
    oracleReportPath = os.path.join(resultsDirectory, 'JUPYTER_ORACLE_REPORT.txt')
    fileToWrite = open(oracleReportPath, 'a')
    fileToWrite.write( status + '\n' )
    fileToWrite.close()


def getLowestScoringCategory( scoringListAsPecents ):
    """
    Get the index of the lowest scoring category.
    """
    
    lowestCategoryPercent = 1.0
    currentLowest = 0
    
    for index in range(len(scoringListAsPecents)):
        currentCategoryPercent = scoringListAsPecents[index]
        if( currentCategoryPercent < lowestCategoryPercent ):
            currentLowest = index
            lowestCategoryPercent = currentCategoryPercent

    return currentLowest


def addNewSamplesToTrainingSet( trainingList, newSamples ):
    """
    Merge the new samples with the Training set.
    """
    return trainingList + newSamples
    

def getSamplesFromAClass( classType, trainingList, allTestListsCombined, numberOfSamples ):
    """
    Get multiple samples from a class.
    """
    newSamples = []
    
    for index in range(numberOfSamples):
        newSample = getASampleOfClass( classType, trainingList, allTestListsCombined )
        newSamples.append( newSample )

    return newSamples
        

def getASampleOfClass( classType, trainingList, allTestListsCombined ):
    """
    Get a Sample from a Specific class that isn't being trained on yet.
    """
    
    if( classType == 'UP' ):
        classType = 'upList'
    elif( classType == 'DOWN' ):
        classType = 'downList'
    elif( classType == 'NA' ):
        classType = 'naList'
    elif( classType == 'HOLE' ):
        classType == 'holeList'
        
    random.seed( datetime.datetime.utcnow() )
    
    dataClassList = csDM.getDictOfClassLists()
    
    trainingListAsSet = set( trainingList )
    allTestListsCombinedAsSet = set( allTestListsCombined )
    
    allElementsOfClass = dataClassList[classType]
    
    selectedSample = random.choice(allElementsOfClass)
    
    while( ( selectedSample in trainingListAsSet) or ( selectedSample in allTestListsCombinedAsSet) ):
        selectedSample = random.choice(allElementsOfClass)

    return selectedSample
    

def getAllTestLists():
    """
    Gets all the test lists.
    Returns:
        A Dict of the test lists.
    """
    allTestLists = []
    allTestListCombined = []
    
    for index in range(5):
        allTestLists.append( csDM.getTestList(index) )
        allTestListCombined = allTestListCombined + csDM.getTestList(index)
        
    return { 'TestLists' : allTestLists, 'CombinedTestLists' : allTestListCombined}

dataClassList = csDM.getDictOfClassLists()
allTheTestLists = getAllTestLists()
theTrainingList = csDM.getTrainingList(0)

print( len( allTheTestLists['TestLists'] ), len( allTheTestLists['CombinedTestLists'] ), len(theTrainingList) )
print("")

combinedTrainingAndNewSampleList = addNewSamplesToTrainingSet( theTrainingList, newTrainingSet )
lowestCategory = csDM.getClassFromNumeral(getLowestScoringCategory(scoringResults['SCORELIST']))
newTrainingSet = getSamplesFromAClass( lowestCategory, theTrainingList, allTheTestLists['CombinedTestLists'], 20 )

print(len(combinedTrainingAndNewSampleList))
print(getLowestScoringCategory(scoringResults['SCORELIST']))
print(lowestCategory)

print("")
print(newTrainingSet)



5 200 200

220
2
DOWN

[15082667660, 15085989050, 15087883470, 15084376710, 15085985661, 15084377460, 15085985011, 15087879680, 15085984771, 15084365231, 15082659990, 15085991310, 15085994271, 15087884390, 15085984830, 15082666740, 15084364850, 15084364850, 15085994200, 15082667661]


In [14]:
print("")




In [15]:
print("")


