# Christian Stonecipher
# Project 4: fnn

In [43]:
# Import pandas to import the needed dataset
import pandas as pd
# Import numpy to perform operations on the dataset
import numpy as np

# Import sklearn functions to preprocess the dataset
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler

# Import sklearn functions to measure and output model performance
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

# Import keras to form and train the ML models
from keras.models import Sequential
from keras.layers import Dense


In [27]:
# Location and name of the Datasets
DatasetPath='Scenario_A/'
TrainingData='Training-a1-a3.csv'
TestingData='Testing-a2-a4.csv'

# Batch Size
BatchSize=10
# Epohe Size
NumEpoch=10

In [33]:
# Importing the datasets
TrainingDataset = pd.read_csv(DatasetPath+TrainingData, header=None)
TestingDataset = pd.read_csv(DatasetPath+TestingData, header=None)

# Initial setup of training data
Train_X = TrainingDataset.iloc[:, 0:-2].values
Train_label_column = TrainingDataset.iloc[:, -2].values

Train_y = []
for i in range(len(Train_label_column)):
    if Train_label_column[i] == 'normal':
        Train_y.append(0)
    else:
        Train_y.append(1)

Train_y = np.array(Train_y)

# Initial setup of testing data
Test_X = TestingDataset.iloc[:, 0:-2].values
Test_label_column = TestingDataset.iloc[:, -2].values

Test_y = []
for i in range(len(Test_label_column)):
    if Test_label_column[i] == 'normal':
        Test_y.append(0)
    else:
        Test_y.append(1)

Test_y = np.array(Test_y)


[0 0 1 ... 0 1 0]
[0 1 1 ... 0 0 1]


In [34]:
# Combine the two datasets to ensure that the one hot encoding outputs the same columns
combined = np.concatenate((Train_X,Test_X))
num_Train = len(Train_X)

ct = ColumnTransformer(
    [('one_hot_encoder', OneHotEncoder(), [1,2,3])],    # The column numbers to be transformed ([1, 2, 3] represents three columns to be transferred)
    remainder='passthrough'                             # Leave the rest of the columns untouched
)

combined = np.array(ct.fit_transform(combined), dtype=float)

#Seporate the encoded datasets back out
Train_X = combined[:num_Train]
Test_X = combined[num_Train:]

# Scale the data in the datasets to the range of [0,1]
sc = StandardScaler()
Train_X = sc.fit_transform(Train_X)  
Test_X = sc.fit_transform(Test_X)

In [36]:
# Set up the FNN
classifier = Sequential()

classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = len(Train_X[0])))
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))

classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

2022-12-03 13:00:23.057049: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2
2022-12-03 13:00:23.087830: I tensorflow/core/platform/profile_utils/cpu_utils.cc:102] CPU Frequency: 3696000000 Hz
2022-12-03 13:00:23.087999: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x23abb30 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2022-12-03 13:00:23.088011: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2022-12-03 13:00:23.089696: I tensorflow/core/common_runtime/process_util.cc:147] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [37]:
# train the keras model
classifierHistory = classifier.fit(Train_X, Train_y, batch_size = BatchSize, epochs = NumEpoch, use_multiprocessing=True)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Print the loss and the accuracy of the model on the dataset
Loss [0,1]: 0.0060 Accuracy [0,1]: 0.9982


In [48]:
# Run the model on the testing data
y_pred = classifier.predict(Test_X)
y_pred = (y_pred > 0.9)   

# Determine and output the performance of the model on the testing dataset
cm = confusion_matrix(Test_y, y_pred)
print('Print the Confusion Matrix:')
print('[ TN, FP ]')
print('[ FN, TP ]=')
print(cm)


f1 = f1_score(Test_y, y_pred)
print('F1 score = ')
print(f1)


acc = accuracy_score(Test_y, y_pred)
print('Accuracy = ')
print(acc)

Print the Confusion Matrix:
[ TN, FP ]
[ FN, TP ]=
[[8714  997]
 [2538 2768]]
F1 score = 
0.6102965494432808
Accuracy = 
0.764600119864154
