In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score, accuracy_score

# Load the saved models
model1 = tf.keras.models.load_model('project17_model.h5')
model2 = tf.keras.models.load_model('project17_DenseNetModel.h5')

# Load the test set using an ImageDataGenerator
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    'F:/Test dataset',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

# Use the models to make predictions on the test set
y_pred1 = model1.predict(test_generator)
y_pred2 = model2.predict(test_generator)

# Take the average of the predictions
ensemble_preds = np.mean([y_pred1, y_pred2], axis=0)

# Convert the predictions to class labels (0 for no mask, 1 for mask)
ensemble_preds = np.argmax(ensemble_preds, axis=1)

# Get the true labels for the test set
y_true = test_generator.classes

# Compute various evaluation metrics
f1 = f1_score(y_true, ensemble_preds)
precision = precision_score(y_true, ensemble_preds)
recall = recall_score(y_true, ensemble_preds)
accuracy = accuracy_score(y_true, ensemble_preds)

# Print the evaluation metrics
print('F1 score:', f1)
print('Precision:', precision)
print('Recall:', recall)
print('Accuracy:', accuracy)

# Compute the confusion matrix and print it
cm = confusion_matrix(y_true, ensemble_preds)
print('Confusion matrix:')
print(cm)

# Generate a classification report and print it
cr = classification_report(y_true, ensemble_preds, target_names=test_generator.class_indices.keys())
print('Classification report:')
print(cr)


Found 5773 images belonging to 2 classes.
F1 score: 0.9942498693152118
Precision: 0.9989495798319328
Recall: 0.9895941727367326
Accuracy: 0.9942837346267106
Confusion matrix:
[[2887    3]
 [  30 2853]]
Classification report:
              precision    recall  f1-score   support

   with_mask       0.99      1.00      0.99      2890
without_mask       1.00      0.99      0.99      2883

    accuracy                           0.99      5773
   macro avg       0.99      0.99      0.99      5773
weighted avg       0.99      0.99      0.99      5773

