# CS-660 Semester Project Notebook 1

## Stan Rosenbaum

Fall 2017

Anaconda 5 / Python 3

Using Keras with TensorFlow as back end.

### Background Important Stuff

First the Enums of the Classes

* "NA" = 0
* "UP" = 1
* "DOWN" = 2
* "HOLE" = 3


In [3]:
# First we init stuff.
# Load the Basic Python Libraries
import os
import csv
import PIL
import pickle
import random
import datetime
import copy

# Load my Data Management Module
import CS660DataManagement as csDM
import HumanOracle as hO

# load numpy
import numpy as np

# Load Keras Stuff
import keras
import keras.backend as K
from keras import layers
from keras.layers import Input, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D
from keras.layers import AveragePooling2D, MaxPooling2D, Dropout, GlobalMaxPooling2D, GlobalAveragePooling2D
from keras.models import Model, Sequential
from keras.models import load_model
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras.applications.imagenet_utils import preprocess_input
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model
from keras.initializers import glorot_uniform
from keras.callbacks import TensorBoard
from keras.utils.np_utils import to_categorical

K.set_image_data_format('channels_last')

# Other.  Mostly Graphic stuff for displaying Data in and out of Jupyter.
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import pydot
import graphviz
from IPython.display import SVG

# Not using Quiver Yet.
# from quiver_engine import server

%matplotlib inline

# Get Processed Data Directory.
processedDataDir = os.path.join( os.getcwd(), os.pardir, 'DATA', 'PROCESSED' )
combinedDataDir = os.path.join( os.getcwd(), os.pardir, 'DATA', 'COMBINED' )
pickleDataDir = os.path.join( os.getcwd(), os.pardir, 'DATA', 'PICKLES' )
modelsDirectory = os.path.join( os.getcwd(), os.pardir, 'MODELS' )
modelsStructsDirectory = os.path.join( os.getcwd(), os.pardir, 'MODELS_STRUCTS' )
weightsDirectory = os.path.join( os.getcwd(), os.pardir, 'WEIGHTS' )
resultsDirectory = os.path.join( os.getcwd(), os.pardir, 'RESULTS' )

testImageColorFile = os.path.join( os.getcwd(), os.pardir, 'DATA', 'COMBINED', 'ICOLOR', 'TEST.png' )
testImageDepthFile = os.path.join( os.getcwd(), os.pardir, 'DATA', 'COMBINED', 'IDEPTH', 'TEST.png' )
testCSVFile = os.path.join( os.getcwd(), os.pardir, 'DATA', 'COMBINED', 'PCLOUD', 'TEST.csv' )

tensorLogDataDir = os.path.join( os.getcwd(), os.pardir, 'TENSOR_LOGS' )

imageDirs = ['ICOLOR', 'IDEPTH']
csvDirs = ['PCLOUD']

allDataFlavors = imageDirs + csvDirs

#Load main data file.
searCSVInfoFile = os.path.join( combinedDataDir, 'SEAR_DC_INFO.csv' )

csDM.CS660DataManagementCheck()

CS660DataManagementCheck Imported


### Load Model Data

Unless we need to train model.

In [4]:
def loadModel( modelName ):
    """
    Loads the Model.
    """
    theModel = load_model( os.path.join( modelsDirectory, modelName) )
    print('MODEL Loaded.')
    return theModel


theModel = loadModel( 'JUPYTER_MODEL')

MODEL Loaded.


### Saves the model

In [6]:
def saveModelEverything( theModel, modelName ):
    """
    Saved Everything in regards to the model.
    """
    saveModelStructure( theModel, modelName )
    saveModel( theModel, modelName )
    saveModelJSON( theModel, modelName )
    saveModelWeights( theModel, modelName )
    
    print("Model Everything Saved")
    

def saveModelStructure( theModel, modelStructureName ):
    """
    Saves an image of the Model Structure.
    """
    modelStructsFilePath = os.path.join(modelsStructsDirectory, modelStructureName )
    plot_model(theModel, to_file=modelStructsFilePath)


def saveModelJSON( model, modelName ):
    """
    Saves the Model as JSON
    Args:
        model: the Keras NN Model
        modelName: the Name

    Returns:

    """
    modelFilePath = os.path.join( modelsDirectory, modelName + '.json' )
    model_json = theModel.to_json()
    with open( modelFilePath, 'w') as json_file:
        json_file.write(model_json)

        
def saveModel( model, modelName ):
    """
    Save the model, in Keras [h5] format.
    """
    theModel.save(os.path.join( modelsDirectory, modelName ))

    
def saveModelWeights( theModel, modelName ):
    """
    Saved the Model Weights
    Args:
        weights: The Weights
        weightName: Weight Names

    Returns:

    """
    weightsFilePath = os.path.join( weightsDirectory, modelName + '.h5' )
    theModel.save_weights( weightsFilePath )
    
    
saveModelEverything( theModel, 'JUPYTER_MODEL')

Model Everything Saved


### Check the stats on the data set.

In [3]:
print( len( csDM.getListOfDataCSVFileKeys() ) )
csDM.reportStats()

901
Number of Everything
UPs: 278
DOWNs: 201
NAs: 204
HOLEs: 218


###  Build a model with Keras



In [16]:
# import keras
# from quiver_engine import server

# input image dimensions
# img_rows, img_cols = 480, 640

# num_classes = 4

def buildModel( numOfNodes=48, numOfLayers=1):
    """
    Builds the basic model.
    Returns:
        A Keras NN Model

    """
    # input image dimensions
    img_rows, img_cols = 480, 640
    input_shape = (img_rows, img_cols, 1)
    num_classes = 4

    print("Building Model with ", numOfNodes, " nodes and ", numOfLayers, " layers.")

    theModel = Sequential()

    theModel.add(
        Conv2D(5,
               kernel_size=(5, 5),
               strides=3,
               activation='relu',
               input_shape=input_shape
               )
    )
    theModel.add(
        MaxPooling2D(
            pool_size=(2, 2)
        )
    )

    theModel.add(
        Conv2D(
            10,
            kernel_size=(3, 3),
            strides=2,
            activation='relu')
    )
    theModel.add(
        MaxPooling2D(
            pool_size=(2, 2),
            strides=2
        )
    )

    theModel.add(Flatten())

    for index in range( numOfLayers ):
        theModel.add(Dense(numOfNodes))
        theModel.add(BatchNormalization())
        theModel.add(Activation('relu'))
        theModel.add(Dropout(0.25))

    theModel.add(Dense(num_classes, activation='softmax'))

    theModel.compile(
        loss=keras.losses.categorical_crossentropy,
        optimizer=keras.optimizers.Adam(),
        metrics=['categorical_accuracy']
    )

    theModel.summary()
    
    return theModel
    
# server.launch(model)

# theModel = buildModel()


### Load a Dataset for Training

In [5]:
X_train, Y_train, X_test, Y_test = csDM.loadTrainingAndTestDataset( '0', 'IDEPTH' )

###  Training the Model.

In [17]:
def trainModel( trainingName, theModel, x_train, y_train, x_test, y_test, num_classes=4, numOfEpochs=24 ):
    """
    Trains the model via given data.

    Args:
        trainingName: A name of this train [mainly to track in TensorBoard
        x_train: The X Set for Trainings
        y_train: The Y set for Testing
        x_test:  The X Set for Training/Verification
        y_test:  The Y Set for Testing/Verification

    Returns:

    """
    img_rows, img_cols = 480, 640
    
    # Reshape the X sets.
    # Mainly for this project.because Keras/Tensor thinks in Channels.
    # And since we are using Greyscale data, we really don't have a channel.
    # So we have to 'fake' a channel
    #
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)

    # Convert class vectors to binary class matrices
    y_train_as_category = to_categorical(y_train, num_classes)
    y_test_as_category = to_categorical(y_test, num_classes)

    logFilePath = os.path.join( tensorLogDataDir, trainingName )

    TBoardCallback = keras.callbacks.TensorBoard(
        log_dir=logFilePath,
        histogram_freq=0,
        write_graph=True,
        write_images=True
    )

    theModel.fit(x_train,
              y_train_as_category,
              batch_size=16,
              epochs=numOfEpochs,
              verbose=1,
              validation_data=(x_test, y_test_as_category),
              callbacks=[TBoardCallback]
              )
    
    return theModel



# X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
# X_test = X_test.reshape(X_train.shape[0], img_rows, img_cols, 1)

# # convert class vectors to binary class matrices
# y_train_as_category = to_categorical(y_train, num_classes)
# y_test_as_category = to_categorical(y_test, num_classes)

# theModel = trainModel( "JupyterTESTRUN", theModel, X_train, Y_train, X_test, Y_test, 4)

### Load Dataset for Testing.

In [7]:
X_test_Z, Y_test_Z = csDM.loadTestOnlyDataset( '2', 'IDEPTH' )

### Evaluate the Model

In [18]:
def evaluateModel( theModel, x_test, y_test, num_classes):
    """
    Evaluated the Model.
    
    Parameters:
        theModel:
        x_test:
        y_test:
        num_classes:
        
    Return:
    
    """
    img_rows, img_cols = 480, 640
    
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    y_test_as_category = to_categorical(y_test, num_classes)

    score = theModel.evaluate(x_test, y_test_as_category, verbose=0)
    print('General > Test loss: ', score[0], 'Test accuracy: ', score[1] )

    predictionResults = theModel.predict_classes(x_test, verbose=1)
    
    scoringList = [0, 0, 0, 0]
    scoringListAsPecents = []
    
    for index in range(len(x_test)):
        if( predictionResults[index] == y_test[index] ):
#             print( index, 'Results: ', predictionResults[index], " VS ", y_test[index], "Match" )
            scoringList[ int(y_test[index]) ] = scoringList[ int(y_test[index]) ] + 1
#         else:
#             print( index, 'Results: ', predictionResults[index], " VS ", y_test[index], "No Match" )
    
    for element in scoringList:
        scoringListAsPecents.append( element / 10.0 )
    
#     print( scoringList )
        
    return { 'SCORE': score, 'SCORELIST' : scoringListAsPecents }


# scoringResults = evaluateModel( theModel, X_test, Y_test, 4)
# scoringResults = evaluateModel( theModel, X_test_Z, Y_test_Z, 4)
# print(scoringResults['SCORELIST'])


### Human Oracle Part

1. Running Training
2. Analysize Training with Test Set via Predict()
    1. Record Results
3. Get the class which got the lowest score from Prediction.
    1. Record that Class
4. Get 20? Elements of that Class as the Oracle.
    1. Record what was added
5. Add those new elements into the Training 
6. Go to Step 1, X Times.

In [1]:
def runHumanOracle( numberOfAges=20 ):
    """
    Human Oracle master function
    """
    
    print("Starting HO")
    
    # Starting
    numberOfEachClass = {'UP':556, 'DOWN': 402, 'NA':408, 'HOLE': 436 }
    
    # After Train/Test removed.
    numberOfEachClass = {'UP':496, 'DOWN': 342, 'NA':348, 'HOLE': 376 }
    
    # What are re analysizing.
    dataFlavor = 'IDEPTH'
    
    # First get the file name we need to record data.
    trainingPredictionResultsFileName = hO.getTrainingPredictionResultsFileName()
    elementsAddedToTrainingSetFileName = hO.getElementsAddedToTrainingSetFileName()
    
    # Get Elements to train and test on
    X_train, Y_train, X_test, Y_test , X_trainList, Y_TestList = csDM.loadTrainingAndTestDatasetAndLists( '0', dataFlavor )
    
    # This is to ensure our test set always stay independent.
    allTestListsCombined = hO.getAllTestLists()
    
    # Get Independent Set for Testing 
    # Techincally the above X_test, Y_test, should be indepedent, according to the Keras documentation.
    # But I am using a second set just because.
    X_test_Z, Y_test_Z = csDM.loadTestOnlyDataset( '2', dataFlavor )
    
    # Build Model
    theModel = buildModel()
        
    # Train the Model
    trainModel( "JupyterHumanOracleTraining", theModel, X_train, Y_train, X_test, Y_test, 4)
        
    # Evaluate the Model [with indie data]
    scoringResults = evaluateModel( theModel, X_test_Z, Y_test_Z, 4)

    # Now the real Fun starts.
    # Get Lowest scoring class.
    lowestScoringClassName = csDM.getClassFromNumeral( hO.getLowestScoringCategory( scoringResults['SCORELIST'] ) )

    # Record the Init Results of the first eval and the first lowest scoring category.
    hO.recordTrainingPredictionResults( trainingPredictionResultsFileName, scoringResults, lowestScoringClassName )

    for index in range(numberOfAges):

        # Get new samples from the lowest scoring class.
        newSamples = hO.getSamplesFromAClass( lowestScoringClassName, X_trainList, allTestListsCombined, 20 )    

        # Subtract newSamples to makes sure we have samples to work with
        
        
        # Record What we added.
        hO.recordElementsAddedToTrainingSet( elementsAddedToTrainingSetFileName, lowestScoringClassName, newSamples )

        X_trainList = X_trainList + newSamples
        
        # Add samples to training set
        # NOTE: We need to turn the 'newSamples' into NPArrays.  It is these new NP Arrays we add to 
        # X_train, Y_train.  Out Original List of the what is in the training sets stay intact.

        # So first get the NPArrays of the new Samples
        dictOfLearningAndVerificationNPArrays = csDM.createNPArraysFor( newSamples, dataFlavor )

        # Then we add them to the training set.
        
#         print(type(X_train))
#         print(type(dictOfLearningAndVerificationNPArrays['LEARNING']))
        
#         print( X_train.shape )
#         print( dictOfLearningAndVerificationNPArrays['LEARNING'].shape )
        
        
        X_train = np.concatenate( (X_train, dictOfLearningAndVerificationNPArrays['LEARNING']), axis=0 )
        Y_train = np.concatenate( (Y_train, dictOfLearningAndVerificationNPArrays['VERIFICATION']), axis=0 )

        # And Then we train the model again.
        stringOfTheAge = 'JupyterHumanOracleTraining_AGE_' + str(index)
        trainModel( stringOfTheAge, theModel, X_train, Y_train, X_test, Y_test, 4, 12)
        
        
        # Evaluate the Model [with indie data]
        scoringResults = evaluateModel( theModel, X_test_Z, Y_test_Z, 4)

        # Now the real Fun starts.
        # Get Lowest scoring class.
        lowestScoringClassName = csDM.getClassFromNumeral( hO.getLowestScoringCategory( scoringResults['SCORELIST'] ) )

        # Record the Init Results of the first eval and the first lowest scoring category.
        hO.recordTrainingPredictionResults( trainingPredictionResultsFileName, scoringResults, lowestScoringClassName )


# runHumanOracle( numberOfAges=20 )
print("")




In [3]:
print("Hello")

Hello


In [28]:
import os
import csv
import numpy as np
import matplotlib.pyplot as plt

resultsDirectory = os.path.join( os.getcwd(), os.pardir, 'RESULTS' )
resultPlotsDirectory = os.path.join( os.getcwd(), os.pardir, 'RESULTS_PLOTS' )

rootString = 'PyCHARM_TRAIN_PREDICTION_RESULTS'

TestLoss = []
TestAccuracy = []
NA = []
UP = []
DOWN = []
HOLE = []

xAxis = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]

headers = ['TestLoss','TestAccuracy','NA','UP','DOWN','HOLE']

def generatePlotsInResults():
    listFromDir = os.listdir( resultsDirectory )
    for element in listFromDir:
        if(rootString in element):
            pathToOpen = os.path.join( resultsDirectory, element )
            with open( pathToOpen, newline='\n') as csvFile:
                csvReader = csv.reader( csvFile, delimiter=',')
                for row in csvReader:
                    if row[1] in headers:
                        pass
                    else:
                        TestLoss.append(float(row[0]))
                        TestAccuracy.append(float(row[1]))
                        NA.append(float(row[2]))
                        UP.append(float(row[3]))
                        DOWN.append(float(row[4]))
                        HOLE.append(float(row[5]))
                        
            plt.plot(xAxis, TestAccuracy, 'b*-')
            plt.plot(xAxis, NA, 'rs:')
            plt.plot(xAxis, UP, 'gp:')
            plt.plot(xAxis, DOWN, 'c^:')
            plt.plot(xAxis, HOLE, 'mh:')
            
            plt.xlabel('AGE')
            
            saveFileName = element + '.png'
            savePath = os.path.join( resultPlotsDirectory, saveFileName ) 
            plt.savefig(savePath)

            TestLoss.clear()
            TestAccuracy.clear()
            NA.clear()
            UP.clear()
            DOWN.clear()
            HOLE.clear()
            
            plt.clf()
            
            
generatePlotsInResults()

ValueError: x and y must have same first dimension, but have shapes (16,) and (13,)

In [None]:
asdf