In [None]:
""" Pattern Recognition Project """
""" @author Mohamed Aboeljered"""

import os
import IPython
import math
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
%matplotlib inline
import random
from datetime import datetime
# from include import helpers
from keras import backend as keras_backend
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten, LeakyReLU, SpatialDropout2D, Activation, Conv2D, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D
from keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical, plot_model
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint 
from keras.regularizers import l2
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, precision_recall_fscore_support, confusion_matrix


In [None]:
import keras.backend as K
import tensorflow as tf

In [None]:
from sklearn.model_selection import train_test_split
import scipy.io
import numpy as np
from google.colab import drive
import pandas as pd

drive.mount('/content/drive')

mat = scipy.io.loadmat('/content/drive/My Drive/RAS/Mustererkennung/Project/data.mat')

labels = np.array(mat['train_label'])
train_data = np.array(mat['train_data'])

# Flatten Matrix
train_data = np.transpose(train_data.reshape(64*998,1728)) #final shape is (1728,64*998)

#Normailze data
np.interp(train_data, (train_data.min(), train_data.max()), (-1, +1))


X_train, X_test, y_train, y_test = train_test_split(
    train_data, 
    labels, 
    test_size=0.2, 
    random_state=69
)

print(train_data.shape)

In [None]:
y_test.shape

In [None]:
#One Hot Encoding
le = LabelEncoder()
y_test_encoded = to_categorical(le.fit_transform(y_test.ravel()))
y_train_encoded = to_categorical(le.fit_transform(y_train.ravel()))

In [None]:
# How data should be structured ( adding channel dimension for the CNN on keras to work well)
num_rows = 64
num_columns = 998
num_channels = 1

# Reshape to fit the network input (channel last)
X_train = X_train.reshape(X_train.shape[0], num_rows, num_columns, num_channels)
X_test = X_test.reshape(X_test.shape[0], num_rows, num_columns, num_channels)

# Total number of labels to predict (equal to the network output nodes)
num_labels = y_train_encoded.shape[1]

In [None]:
def create_model(spatial_dropout_rate_1=0, spatial_dropout_rate_2=0, l2_rate=0):

    # Create a secquential object
    model = Sequential()


    # Conv 1
    model.add(Conv2D(filters=32, 
                     kernel_size=(3, 3), 
                     kernel_regularizer=l2(l2_rate), 
                     input_shape=(num_rows, num_columns, num_channels)))
    model.add(LeakyReLU(alpha=0.1))
    model.add(BatchNormalization())

    model.add(SpatialDropout2D(spatial_dropout_rate_1))
    model.add(Conv2D(filters=32, 
                     kernel_size=(3, 3), 
                     kernel_regularizer=l2(l2_rate)))
    model.add(LeakyReLU(alpha=0.1))
    model.add(BatchNormalization())


    # Max Pooling #1
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(SpatialDropout2D(spatial_dropout_rate_1))
    model.add(Conv2D(filters=64, 
                     kernel_size=(3, 3), 
                     kernel_regularizer=l2(l2_rate)))
    model.add(LeakyReLU(alpha=0.1))
    model.add(BatchNormalization())

    model.add(SpatialDropout2D(spatial_dropout_rate_2))
    model.add(Conv2D(filters=64, 
                     kernel_size=(3,3), 
                     kernel_regularizer=l2(l2_rate)))
    model.add(LeakyReLU(alpha=0.1))
    model.add(BatchNormalization())
    
   
    # Reduces each h×w feature map to a single number by taking the average of all h,w values.
    model.add(GlobalAveragePooling2D())


    # Softmax output
    model.add(Dense(num_labels, activation='softmax'))
    
    return model

# Regularization rates
spatial_dropout_rate_1 = 0.07
spatial_dropout_rate_2 = 0.14
l2_rate = 0.0005

model = create_model(spatial_dropout_rate_1, spatial_dropout_rate_2, l2_rate)

In [None]:
adam = Adam(lr=1e-4, beta_1=0.99, beta_2=0.999)
model.compile(
    loss='categorical_crossentropy', 
    metrics=['accuracy'], 
    optimizer=adam)

# Display model architecture summary 
model.summary()

In [None]:
num_epochs = 1000
num_batch_size = 128
model_file = 'NoDrop_85test.hdf5'
!mkdir models
models_path = './models'
model_path = os.path.join(models_path, model_file)


# Save checkpoints
checkpointer = ModelCheckpoint(filepath=model_path, 
                               verbose=1, 
                               save_best_only=True)
start = datetime.now()

# session = K.backend.get_session()
# init = tf.initialize_all_variables()
# sess.run(init)
history = model.fit(X_train, 
                    y_train_encoded, 
                    batch_size=num_batch_size, 
                    epochs=num_epochs, 
                    validation_split=1/12.,
                    callbacks=[checkpointer], 
                    verbose=1)



duration = datetime.now() - start
print("Training completed in time: ", duration)


In [None]:
from google.colab import files
files.download(model_path) 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
### Retraining with test data set


model = load_model('NoDrop_85test.hdf5')

# Save checkpoints
checkpointer = ModelCheckpoint(filepath='ModelFullDataSet.hdf5', 
                               verbose=1, 
                               save_best_only=True)
start = datetime.now()

history = model.fit(X_test, 
                    y_test_encoded, 
                    batch_size=64, 
                    epochs=1000, 
                    validation_split=1/12.,
                    callbacks=[checkpointer], 
                    verbose=1)



duration = datetime.now() - start
print("Training completed in time: ", duration)


In [None]:
from google.colab import files
files.download('ModelFullDataSet.hdf5') 

In [None]:
################################### Classifier Performance Check ##############################
from sklearn.metrics import ConfusionMatrixDisplay
#Check Accuracy on train data
model = load_model('ModelFullDataSet.hdf5')

y_trainp = model.predict(X_train)
print(y_trainp.shape)
acc = (np.argmax(y_trainp,axis=1) == np.argmax(y_train_encoded,axis=1))
print('train accuracy = ' ,np.sum(acc)/len(y_train))

disp = ConfusionMatrixDisplay(confusion_matrix(np.argmax(y_train_encoded,axis=1), (np.argmax(y_trainp,axis=1))), display_labels = ['Airport','Bus','Metro','Public Square'])
disp.plot()
disp.figure_.savefig('Train Data Confusion Matrix')

#Check Accuracy on test data
y_testp = model.predict(X_test)
print(y_testp.shape)
acc = (np.argmax(y_testp,axis=1) == np.argmax(y_test_encoded,axis=1))
print('test accuracy = ' ,np.sum(acc)/len(y_test))

# y_trainp = model.predict(X_train)
# from sklearn.metrics import confusion_matrix
# confusion_matrix(np.argmax(y_train_encoded,axis=1), (np.argmax(y_trainp,axis=1))

disp = ConfusionMatrixDisplay(confusion_matrix(np.argmax(y_test_encoded,axis=1), (np.argmax(y_testp,axis=1))), display_labels = ['Airport','Bus','Metro','Public Square'])
disp.plot()
disp.figure_.savefig('Test Data Confusion Matrix')

In [None]:
###################################################################################
############################ Load  Unknown  Data ##################################
###################################################################################
unlabelledDataMat = scipy.io.loadmat('/content/drive/My Drive/RAS/Mustererkennung/Project/testdata.mat')


In [None]:
###################################################################################
############################ Inference ############################################
###################################################################################
"""Apply the model on the unknown test data and save the labels in text file"""

unlabelledDataRaw = np.array(unlabelledDataMat['test_data'])

#Sanity check
print(unlabelledDataRaw.shape)

#Reshape Data 
unlabelledDataFlat = np.transpose(unlabelledDataRaw.reshape(64*998,1728)) # New shape = (1728,64*998)
unlabelledData = unlabelledDataFlat.reshape(unlabelledDataFlat.shape[0], num_rows, num_columns, num_channels)  # New shape = (1728,64,998,1)

#Load Saved Model
model = load_model('ModelFullDataSet.hdf5')


y_infer = model.predict(unlabelledData)
y_infer = np.argmax(y_infer,axis=1) +1  # add one to the argmax because one hot encoding starts at index 0 (data labels starts at 1)
np.savetxt('label.txt', y_infer.reshape(1,y_infer.shape[0]),fmt='%d', delimiter=';',newline='')