<a href="https://colab.research.google.com/github/Adetayo047/Toby-s_DCNN_-with_attention_project/blob/main/trying_DCNN_on_MNIST_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow.keras import layers, models
from sklearn.metrics import accuracy_score, roc_auc_score, auc, roc_curve
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [None]:
#load and preprocess the mnist dataset
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()
train_images = train_images.reshape((60000, 28, 28, 1)).astype('float32') / 255
test_images = test_images.reshape((10000, 28, 28, 1)).astype('float32') / 255

#convert labels ti one-hot emcoding
test_labels = tf.keras.utils.to_categorical(test_labels)
train_labels = tf.keras.utils.to_categorical(train_labels)

# define a simple CNN model with attention mechanism

def cnn_with_attention_model():
  model = models.Sequential()

  #convolutional layers
  model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
  model.add(layers.MaxPooling2D((2, 2)))
  model.add(layers.Conv2D(64, (3, 3), activation='relu'))
  model.add(layers.MaxPooling2D((2, 2)))

  # flatten the output for dense layers
  model.add(layers.Flatten())

  #intermediate feature
  intermedaite_features = model.layers[-1].output

  #attention mechanism
  attention_weights = layers.Dense(1, activation='relu')(intermedaite_features)
  attention_weights = layers.Reshape((1, 1, 1))(attention_weights)
  attended_features = layers.Multiply()([intermedaite_features, attention_weights])


  #dense layers
  model.add(layers.Dense(64, activation='relu'))
  model.add(layers.Dense(10, activation='softmax'))

  return model


#create the model
model = cnn_with_attention_model()

#compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy'])

#train the model
history = model.fit(train_images, train_labels, epochs=10, batch_size=64, validation_data=(test_images, test_labels))

#plot training and validation accuracy
plt.plot(history.history['accuracy'], label = "trainig accuracy")
plt.plot(history.history['val_accuracy'], label = "validation accuracy")
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show

#Evaluate the model
test_loss, test_acc = model.evaluate(test_images, test_labels)
print('Test accuracy:', test_acc)

#Predict probabilities for ROC curve
y_pred_prob = model.predict(test_images)

#compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()

for i in range(10):
  fpr[i], tpr[i], _ = roc_curve(test_labels[:, i], y_pred_prob[:, i])
  roc_auc[i] = auc(fpr[i], tpr[i])

#plot ROC curves
plt.figure()
for i in range(10):
  plt.plot(fpr[i], tpr[i], label='Class %d (AUC = %0.2f)' % (i, roc_auc[i]))

plt.plot([0, 1], [0, 1], 'k--', label = 'Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve for each of the class')
plt.legend()
plt.show()