### Notebook for classsifying using a Concolutional Neural Network (CNN).

This notebook is excpecting the data to be numeric. A method of ensuring this to first use Pre_Processing_USE-4.ipynb.

 Cells are executed in the order in which they appear in the notebook.

In [3]:
# Import libraries
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, ZeroPadding2D, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
import datetime
import tensorboard

import cv2

from img_tools import reshape_rows

Import numerical feature and label data.

In [4]:
# Load dataset
X = pd.read_csv('X_numeric.csv', sep=',')
y = pd.read_csv('y_numeric.csv', sep=',')

In [5]:
# Check the number of features in case it could be better to insert a column of ones to get a better shape image
X.shape

(656, 71)

In [9]:
# Find possible dimensions of the image
for i in range(X.shape[1]):
    if X.shape[1]%(i+1) == 0 and i>2: # i>2 because convolutional layers are not possible with less than 3x3 filters
        print(f'Possible dimensions: {[X.shape[1]/(i+1), i+1]}')

Possible dimensions: [14.0, 5]
Possible dimensions: [10.0, 7]
Possible dimensions: [7.0, 10]
Possible dimensions: [5.0, 14]
Possible dimensions: [2.0, 35]
Possible dimensions: [1.0, 70]


In [7]:
# Check the shape of the labels in case the first row is the index
y.shape

(656, 1)

In [8]:
# If the index has been added to the first column, run this cell 
X=X.iloc[:,1:] # Remove the index if it is the first column and it would help the shape. Consider replacing with a column of ones.
#y=y.iloc[:,1:]

Split to train and test sets. Save them as np.array and make sure they are float. Perform scaling.

In [10]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
# Save to array and make sure dtype is float or else the scaling might not work.
X_train=np.asarray(X_train).astype(np.float_)
X_test=np.asarray(X_test).astype(np.float_)

y_train=np.asarray(y_train).astype(np.float_)
y_test=np.asarray(y_test).astype(np.float_)

In [12]:
# Scale the features
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

Reshape the data to be suitable for the CNN. Filter with Gaussian filter and perform histogram stretching.

In [14]:
# Find a filter and edge strategy
filtermask = cv2.getGaussianKernel(3,1)
borderType = cv2.BORDER_DEFAULT

# Remember to change the dimensions/shape to fit the data!!!
X_train = reshape_rows(X_train, filtermask, borderType, (7,10))
X_test = reshape_rows(X_test, filtermask, borderType, (7,10))

Design the model. Train and evaluate.

In [17]:
# Basic CNN model. Uncomment to add some layers, but it wont increase the accuracy. Remember to change input_shape!!!
model = Sequential()
model.add(Conv2D(64, (3, 3), activation='relu', input_shape=(7, 10, 1))) # 32 filters, 3x3 kernel, remember to set input_shape
#model.add(Conv2D(64, (3, 3), activation='relu'))
#model.add(MaxPooling2D(pool_size=(2, 2)))
#model.add(Dropout(0.25))
#model.add(ZeroPadding2D((1,1)))
#model.add(Conv2D(128, (3, 3), activation='relu'))
#model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(12, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_1 (Conv2D)           (None, 5, 8, 64)          640       
                                                                 
 flatten_1 (Flatten)         (None, 2560)              0         
                                                                 
 dense_2 (Dense)             (None, 12)                30732     
                                                                 
 dense_3 (Dense)             (None, 1)                 13        
                                                                 
Total params: 31,385
Trainable params: 31,385
Non-trainable params: 0
_________________________________________________________________


In [18]:
# Check if the shapes are correct
print(f'X_train shape: {X_train.shape}, y_train shape: {y_train.shape}, X_test shape: {X_test.shape}, y_test shape: {y_test.shape}')

# Log experiment with tensorboard and train the model. Change epochs and batch_size to see how it affects the accuracy.
log_dir = "logs/fit/cnn5/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test)) 

X_train shape: (524, 7, 10), y_train shape: (524, 1), X_test shape: (132, 7, 10), y_test shape: (132, 1)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x195500416d0>

In [19]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test,verbose=1)
print(f'Results from test set: loss: {test_loss} - accuracy: {test_accuracy}')

Results from test set: loss: 0.8342361450195312 - accuracy: 0.6515151262283325


In [None]:
# Check the results in tensorboard
%tensorboard --logdir logs/fit