In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score, accuracy_score

# Load the saved models
model1 = tf.keras.models.load_model('project17_InceptionV3model.h5')
model2 = tf.keras.models.load_model('project17_model.h5')

# Load the test set using an ImageDataGenerator
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    'F:/Test dataset 2',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

# Use the models to make predictions on the test set
y_pred1 = model1.predict(test_generator)
y_pred2 = model2.predict(test_generator)

# Take the average of the predictions
ensemble_preds = np.mean([y_pred1, y_pred2], axis=0)

# Convert the predictions to class labels (0 for no mask, 1 for mask)
ensemble_preds = np.argmax(ensemble_preds, axis=1)

# Get the true labels for the test set
y_true = test_generator.classes

# Compute various evaluation metrics
f1 = f1_score(y_true, ensemble_preds)
precision = precision_score(y_true, ensemble_preds)
recall = recall_score(y_true, ensemble_preds)
accuracy = accuracy_score(y_true, ensemble_preds)

# Print the evaluation metrics
print('F1 score:', f1)
print('Precision:', precision)
print('Recall:', recall)
print('Accuracy:', accuracy)

# Compute the confusion matrix and print it
cm = confusion_matrix(y_true, ensemble_preds)
print('Confusion matrix:')
print(cm)

# Generate a classification report and print it
cr = classification_report(y_true, ensemble_preds, target_names=test_generator.class_indices.keys())
print('Classification report:')
print(cr)


Found 10000 images belonging to 2 classes.
F1 score: 0.9768672809253088
Precision: 0.9579008073817762
Recall: 0.9966
Accuracy: 0.9764
Confusion matrix:
[[4781  219]
 [  17 4983]]
Classification report:
              precision    recall  f1-score   support

    WithMask       1.00      0.96      0.98      5000
 WithoutMask       0.96      1.00      0.98      5000

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000

