In [1]:
import pandas as pd
from tensorflow import keras
import numpy as np
from sklearn import preprocessing
from sklearn import model_selection
from pickle import dump, load

from training_functions import summarize_data, get_encoded_labels_overshoot_undershoot_classification

In [2]:
# Load data
dataset = pd.read_csv("../Datasets/Three Coefficient 2nd Order/Balanced Dataset 1.csv")
print(dataset.head())
summarize_data(dataset)

         N1         D2         D1  RiseTime  TransientTime  SettlingTime  \
0 -2.098997   9.355879  12.169613  1.418254       2.808480      3.004993   
1 -8.064675   7.424314  10.984939  1.178159       2.236274      2.261321   
2  3.355627  10.362653  17.972740  0.690566       1.434136      1.434136   
3 -5.016306   6.777631  10.215730  1.109049       2.153838      2.198836   
4 -6.944586   7.414111  13.427289  0.904405       1.722170      1.748116   

   SettlingMin  SettlingMax  Overshoot  Undershoot      Peak  PeakTime  
0    -0.172400    -0.155525        0.0   35.908827  0.172400  5.423694  
1    -0.733804    -0.660804        0.0    5.243774  0.733804  4.088517  
2     0.168055     0.186666        0.0   -0.000000  0.186666  3.487674  
3    -0.490662    -0.442658        0.0   10.675850  0.490662  3.643252  
4    -0.516743    -0.465671        0.0    8.085863  0.516743  2.766764  
Models with overshoot: 3334/33.339999999999996%
Models with undershoot: 3334/33.339999999999996%
Models w

In [3]:
# Determine the appropriate label for each entry using one-hot encoding
# [0 0 1] for undershoot, [0 1 0] for overshoot, [1 0 0] for neither
labels = get_encoded_labels_overshoot_undershoot_classification(dataset)

In [4]:
# Convert to numpy array
data_array = dataset.to_numpy()

# Separate the coefficients, which form the inputs to the neural network, from the rest of the data
# The coefficients are the first columns in the CSV file
coefficients = data_array[:,0:3]
# Use StandardScaler to remove the mean and scale to unit variance
scaler = preprocessing.StandardScaler().fit(coefficients)
scaled_coefficients = scaler.transform(coefficients)

print("Dimensions of the coefficient/input matrix")
print(scaled_coefficients.shape)

Dimensions of the coefficient/input matrix
(10000, 3)


In [5]:
# Reserve 10% of the data as test data, and use the rest as training data
train_inputs,test_inputs,train_labels,test_labels = model_selection.train_test_split(scaled_coefficients, labels, test_size=0.10)
print(train_inputs.shape, train_labels.shape,test_inputs.shape, test_labels.shape)

(9000, 3) (9000, 3) (1000, 3) (1000, 3)


In [6]:
# Create the model
model = keras.models.Sequential()
# Hidden layers each use 128 nodes with the RELU activation function
model.add(keras.layers.Dense(128, input_shape=(3,), name='layer_1', activation='relu'))
model.add(keras.layers.Dense(128, name='layer_2', activation='relu'))
# Output layer uses the softmax activation function
model.add(keras.layers.Dense(3, name='output_layer', activation='softmax'))
model.compile(loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 layer_1 (Dense)             (None, 128)               512       
                                                                 
 layer_2 (Dense)             (None, 128)               16512     
                                                                 
 output_layer (Dense)        (None, 3)                 387       
                                                                 
Total params: 17,411
Trainable params: 17,411
Non-trainable params: 0
_________________________________________________________________


In [7]:
# Training, validation, and testing
history=model.fit(train_inputs,
          train_labels,
          batch_size = 16,
          epochs = 10,
          verbose = 1,
          validation_split = 0.2)

print("Evaluation against test data:")
model.evaluate(test_inputs, test_labels)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Evaluation against test data:


[0.0771091878414154, 0.9639999866485596]

In [8]:
# Export the model and the scaler used with the data
# Both will be stored in a directory matching the model name
model.save("OUID_3C_Basic/OUID_3C_Basic")
dump(scaler, open('OUID_3C_Basic/scaler.pkl','wb'))



INFO:tensorflow:Assets written to: OUID_3C_Basic/OUID_3C_Basic\assets


INFO:tensorflow:Assets written to: OUID_3C_Basic/OUID_3C_Basic\assets


In [9]:
# For testing existing models on new data
model = keras.models.load_model("../Finished Neural Network Models/OUID_3C_Optimized/OUID_3C_Optimized")
scaler = load(open('../Finished Neural Network Models/OUID_3C_Optimized/scaler.pkl','rb'))
dataset = pd.read_csv("../Datasets/Three Coefficient 2nd Order/Dataset 2.csv")
labels = get_encoded_labels_overshoot_undershoot_classification(dataset)
data_array = dataset.to_numpy()
scaled_input = scaler.transform(data_array[:,0:3])
raw_predictions = model.predict(scaled_input)
predictions = np.argmax(raw_predictions,1)
actual = np.argmax(labels,1)
# Analysis of the performance of the model
incorrect_indices = (predictions - actual).nonzero()
num_incorrect = incorrect_indices[0].shape[0]
print("Number of classification errors: " + str(num_incorrect))
print("Accuracy: " + str((dataset.shape[0] - num_incorrect)/dataset.shape[0]))
# Generate confusion matrix; rows correspond to actual label and columns correspond to predicted label
confusion_matrix = np.zeros((3,3))
for i in range(dataset.shape[0]):
    if actual[i] == 0 and predictions[i] == 0:
        confusion_matrix[0,0] = confusion_matrix[0,0] + 1
    elif actual[i] == 1 and predictions[i] == 0:
        confusion_matrix[1,0] = confusion_matrix[1,0] + 1
    elif actual[i] == 2 and predictions[i] == 0:
        confusion_matrix[2,0] = confusion_matrix[2,0] + 1
    elif actual[i] == 0 and predictions[i] == 1:
        confusion_matrix[0,1] = confusion_matrix[0,1] + 1
    elif actual[i] == 1 and predictions[i] == 1:
        confusion_matrix[1,1] = confusion_matrix[1,1] + 1
    elif actual[i] == 2 and predictions[i] == 1:
        confusion_matrix[2,1] = confusion_matrix[2,1] + 1
    elif actual[i] == 0 and predictions[i] == 2:
        confusion_matrix[0,2] = confusion_matrix[0,2] + 1
    elif actual[i] == 1 and predictions[i] == 2:
        confusion_matrix[1,2] = confusion_matrix[1,2] + 1
    elif actual[i] == 2 and predictions[i] == 2:
        confusion_matrix[2,2] = confusion_matrix[2,2] + 1
print("Confusion matrix:")
print(confusion_matrix)

Number of classification errors: 241
Accuracy: 0.9759
Confusion matrix:
[[3432.   11.  128.]
 [  41. 1470.   12.]
 [   0.   49. 4857.]]


Optimizing the hyperparameters (Run cells 1 through 5 before running these to prepare the dataset that will be used for training)

In [10]:
# Comparing different options for number of hidden layers and nodes per layer (keeping the nodes per layer uniform)
# Here we generate the model for each combination
num_layer_options = [2, 3, 4]
nodes_per_layer_options = [64, 128, 256]
indx_1 = indx_2 = 0
# May add in a graph to compare training performance later
#accuracy_measures = [[0 for j in range(len(num_layer_options))] for i in range(len(nodes_per_layer_options))]
models = [[0 for j in range(len(num_layer_options))] for i in range(len(nodes_per_layer_options))]
for nlo in num_layer_options:
    for nplo in nodes_per_layer_options:
        
        model = keras.models.Sequential()
        model.add(keras.layers.Dense(nplo,
                                        input_shape=(3,),
                                        name='1',
                                        activation='relu'))
        for i in range(nlo-1):
            model.add(keras.layers.Dense(nplo,
                                        name=str(i+2),
                                        activation='relu'))
        model.add(keras.layers.Dense(3,
                                    name='output_layer',
                                    activation='softmax'))

        model.compile(loss='categorical_crossentropy',
                    metrics=['accuracy'])
        model.fit(train_inputs,train_labels,batch_size = 16,epochs = 10,verbose = 0,validation_split = 0.2)
        #accuracy_measures[indx_1][indx_2] = model.fit(train_inputs,train_labels,batch_size = 16,epochs = 10,verbose = 0,validation_split = 0.2).history["accuracy"]
        models[indx_1][indx_2] = model
        indx_2 = indx_2 + 1
    indx_1 = indx_1 + 1
    indx_2 = 0


In [11]:
# Load a separate dataset for testing
test_dataset = pd.read_csv("../Datasets/Three Coefficient 2nd Order/Balanced Dataset 2.csv")
labels = get_encoded_labels_overshoot_undershoot_classification(test_dataset)
data_array = test_dataset.to_numpy()
scaled_input = scaler.transform(data_array[:,0:3])

for i in range(len(num_layer_options)):
    for j in range(len(nodes_per_layer_options)):
        #Evaluate the model against the test dataset and print results
        print("Evaluation against test data:")
        models[i][j].evaluate(scaled_input, labels)


Evaluation against test data:
Evaluation against test data:
Evaluation against test data:
Evaluation against test data:
Evaluation against test data:
Evaluation against test data:
Evaluation against test data:
Evaluation against test data:
Evaluation against test data:


The best performance in terms of accuracy came from using 4 layers and 128 nodes per layer in my testing; note that the performance will vary