# Uczenie maszynowe <br> Zadanie domowe nr 1: Klasyfikacja wektorów nośnych i regresja logistyczna - z elementami konkursu
Politechnika Gdańska, Wydział ETI, Katedra Inżynierii Biomedycznej

# Cel:
Opracować procedurę złożoną z przetwarzania wstępnego oraz klasyfikacji danych z wykorzystaniem sieci neuronowych (uwaga - jedynie MLP - bez CNN, itp.) w celu uzyskania jak najlepszych wyników klasyfikacji dla zadanego zbioru danych.

# Problem:
Dla zbioru CIFAR-10 należy wybrać przykłady dla wskazanych 3 klas. Następnie opracować procedurę w celu uzyskania najlepszych wyników klasyfikacji mierzonych z użyciem miary F1-score, F1 = 2(recall*precision)/(recall+precision). Do wyznaczenia wartości miary należy zastosować funkcję f1_score z pakietu scikit-learn (from sklearn.metrics import f1_score).

# Dane studentów
10.05.2021 <br>
Inżynieria biomedyczna, Sztuczna inteligencja gr. 1 <br>
Patrycja Gładkowska 171951 <br>
Szymon Sadowski 165298

In [3]:
# Download data and select classes 'plane', 'cat', 'truck'

import random
from keras.datasets import cifar10
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix, f1_score, classification_report
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.utils import np_utils

(X_train, y_train), (X_test, y_test) = cifar10.load_data()
class_names = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


# indexes = [0, 3, 9]

# idx_train = np.array([idx for idx in range(y_train.shape[0]) if y_train[idx] in indexes])
# idx_test = np.array([idx for idx in range(y_test.shape[0]) if y_test[idx] in indexes])

# x_train = X_train[idx_train]
# y_train = y_train[idx_train]

# x_test = X_test[idx_test]
# y_test = y_test[idx_test]

In [13]:
# Preprocessing

epochs = 5
batch_size = 128
nb_classes = 10

# Reshape data
X_train = X_train.reshape(50000, 32 * 32 * 3)
X_test = X_test.reshape(10000, 32 * 32 * 3)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255.0
X_test /= 255.0

Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

In [14]:
# Classification

# MLP
model = Sequential()
model.add(Dense(1024, input_shape=(3072, )))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

# training
history = model.fit(X_train, Y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(X_test, Y_test))


Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 1024)              3146752   
_________________________________________________________________
activation_8 (Activation)    (None, 1024)              0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_9 (Dense)              (None, 512)               524800    
_________________________________________________________________
activation_9 (Activation)    (None, 512)               0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 512)              

In [15]:
# Metrics

loss, acc = model.evaluate(X_test, Y_test, verbose=0)
print('Test loss:', loss)
print('Test acc:', acc)

Test loss: 2.302593469619751
Test acc: 0.10000000149011612


In [16]:
# Metrics

y = model.predict(X_test)

# Transform "probabilities" to class codes
det = tf.cast((y+0.5), tf.int32)
detections = tf.cast(det, tf.float32)

# Change one-hot encoded value into simple class code values
y_t = np.argmax(Y_test, axis=1)
y_p = np.argmax(detections, axis=1)

# Calculate confusion matrix
# (y_true, y_pred) -> the rows represent true labels, columns predictions
cm = tf.math.confusion_matrix(y_t, y_p) 
print("Confusion Matrix: ")
print(cm.numpy())

# Define functions to count TP, FP, FN, recall and precision for each class

def false_positives(conf_matrix):
    sums = np.sum(conf_matrix, axis=0) # sum for rows
    fp = np.subtract(sums, np.diagonal(conf_matrix))
    return fp

def false_negatives(conf_matrix):
    sums = np.sum(conf_matrix, axis=1)  # sum for columns
    fn = np.subtract(sums, np.diagonal(conf_matrix))
    return fn

def recall (conf_matrix):
    tp = np.diagonal(conf_matrix)
    fn = false_negatives(conf_matrix)
    return (tp/(tp+fn))
  
def precision (conf_matrix):
    tp = np.diagonal(conf_matrix)
    fp = false_positives(conf_matrix)
    return (tp/(tp+fp))

print("\nRecall and precision values per class:")
print("Recall: \n", 100*recall(cm))
print("Precision: \n", 100*precision(cm))

print("Mean recall: ", 100*np.mean(recall(cm)))
print("Mean precision: ", 100*np.mean(precision(cm)))

Confusion Matrix: 
[[1000    0    0    0    0    0    0    0    0    0]
 [1000    0    0    0    0    0    0    0    0    0]
 [1000    0    0    0    0    0    0    0    0    0]
 [1000    0    0    0    0    0    0    0    0    0]
 [1000    0    0    0    0    0    0    0    0    0]
 [1000    0    0    0    0    0    0    0    0    0]
 [1000    0    0    0    0    0    0    0    0    0]
 [1000    0    0    0    0    0    0    0    0    0]
 [1000    0    0    0    0    0    0    0    0    0]
 [1000    0    0    0    0    0    0    0    0    0]]

Recall and precision values per class:
Recall: 
 [100.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
Precision: 
 [10. nan nan nan nan nan nan nan nan nan]
Mean recall:  10.0
Mean precision:  nan


  return (tp/(tp+fp))


In [None]:
# Validation

# pred_test = model.predict(x_test)

# accuracy_test = model.score(x_test, y_test)
# print(f"Accuracy: {accuracy_test}")

# print("Confusion matrix:")
# conf_matrix_test = confusion_matrix(y_test, pred_test)
# print(conf_matrix_test)

# f1_score_test = f1_score(y_test, pred_test, average="weighted")
# print(f"F1 score: {f1_score_test}")

# print("Classification report:")
# classification_report_test = classification_report(y_test, pred_test, target_names=target_names)
# print(classification_report_test)

In [None]:
# test_example_id = random.randrange(len(idx_test))
# size = X_te.shape[1]
# t = (X_te[test_example_id]).reshape((1,size))

# pred_t = model.predict(t)

# print(f"Class names: {target_names}")
# print(f"Predicted class code: {pred_t}")
# print(f"Predicted class label: {class_names[pred_t[0]]}")

# scores = model.decision_function(t)
# print(f"Scores for each class: {scores}")

# example_test_true_label = class_names[y_test[idx_test[test_example_id]][0]]
# print(f"True label: {example_test_true_label}")

# example_test_image = x_test[idx_test[test_example_id],:,:,:]
# plt.imshow(example_test_image)
# plt.show()

In [None]:
# Extra validation

In [None]:
# enter your data
# X_test = ''
# y_test_reduced = ''

# pred_test_extra = model.predict(X_test)

# accuracy_test_extra = model.score(X_test, y_test_reduced)
# print(f"Accuracy: {accuracy_test_extra}")

# print("Confusion matrix:")
# conf_matrix_test_extra = confusion_matrix(y_test_reduced, pred_test_extra)
# print(conf_matrix_test_extra)

# f1_score_test_extra = f1_score(y_test_reduced, pred_test_extra, average="weighted")
# print(f"F1 score: {f1_score_test_extra}")

# print("Classification report:")
# classification_report_test_extra = classification_report(y_test_reduced, pred_test_extra, target_names=target_names)
# print(classification_report_test_extra)