
## Image Classfication


## Importing the libraries

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Flatten, Conv1D, Conv2D, Conv3D, Dropout, MaxPooling1D, MaxPooling2D, MaxPooling3D, AveragePooling1D, AveragePooling2D, AveragePooling3D, BatchNormalization


## Start stopwatch

In [None]:

from time import process_time
time_start = process_time() 


## Import the dataset

In [None]:

X = pd.read_csv('/home/vitor/Documents/TACS/MLhub/data/cnn/features.csv')
y = pd.read_csv('/home/vitor/Documents/TACS/MLhub/data/cnn/labels.csv')


## Splitting the dataset into the Training set and Test set

In [None]:

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,  test_size=0.1,
                        train_size=None,
                        random_state=0,
                        shuffle=0,
                        stratify=X if "None" == "features" else y if "None" == "labels" else None)
y_true = y_test


## Data preprocessing

In [None]:

def data_preprocessing(X, y):

    out_y = keras.utils.to_categorical(y)

    num_images = X.shape[0]
    x_as_array = X.values[:,0:]
    x_shaped_array = x_as_array.reshape(num_images, 28, 28, 1)
    out_x = x_shaped_array / 255
    return out_x, out_y

X_train, y_train = data_preprocessing(X_train, y_train)
X_test, y_test = data_preprocessing(X_test, y_test)


## Build the model

In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size=[3, 3], activation="relu", input_shape=(28, 28, 1)))
model.add(MaxPooling2D([2, 2]))
model.add(Dropout(0.25))
model.add(Conv2D(64, kernel_size=[3, 3], activation="relu"))
model.add(MaxPooling2D([2, 2]))
model.add(Dropout(0.25))
model.add(Conv2D(128, kernel_size=[3, 3], activation="relu"))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(10, activation="softmax"))


model.compile(
            loss=keras.losses.CategoricalCrossentropy(),
            optimizer=keras.optimizers.Adam(),
            metrics=['accuracy'])

model.summary()

## Training the model on the Training set

In [None]:

history = model.fit(X_train, y_train,
                  batch_size=128,
                  epochs=2,
                  validation_split=0.1)


## Evaluating the Train Performance

In [None]:

# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()


## Predicting the Test set results

In [None]:

score = model.evaluate(X_test, y_test)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
pred = model.predict(X_test) 
y_pred = np.argmax(pred, axis = 1) 


## Evaluating the Model Performance

In [None]:

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, plot_confusion_matrix
import seaborn as sns

acc = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')**0.5
f1 = f1_score(y_true, y_pred, average='weighted')
print(f'Accuracy Score: {acc}')
print(f'Precision Score: {precision}')
print(f'Recall Score: {recall}')
print(f'F1 Score: {f1}')

cm = confusion_matrix(y_true, y_pred)
cm_df = pd.DataFrame(cm)           
ax = plt.subplot()
sns.heatmap(cm, annot=True, fmt='g', ax=ax);  
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels'); 
ax.set_title('Confusion Matrix');           
plt.show()

time_stop = process_time()
cpu_time = round(time_stop - time_start, 2)
print(f'Elapsed CPU Time: {cpu_time} seconds')


## Saving Model Statistics

In [None]:

import os
import json

path = 'statistics'
if not os.path.exists(path):
    os.mkdir(path)

stats = {
    "Accuracy Score": acc,
    "Precision Score": precision,
    "Recall Score": recall,
    "F1 Score": f1,
    "cpu time": cpu_time,
    "predicted": y_pred.flatten().tolist(), 
    "real": y_true.to_numpy().flatten().tolist()
}

with open(os.path.join(path, "cnn_71103630-7d48-11ec-b088-d7c57270a0f7.json"), "w") as f:
    json.dump(stats, f, ensure_ascii=False, indent=4)
