# Multi-Modal Nutritional Deficiency Model Evaluation

This notebook evaluates a trained multi-modal model for nutritional deficiency detection, focusing on multi-label classification metrics.

## 1. Import Required Libraries

In [1]:
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.efficientnet import preprocess_input
from sklearn.metrics import (
    hamming_loss,
    accuracy_score,
    f1_score,
    roc_auc_score,
    precision_recall_curve,
    auc,
    classification_report,
    confusion_matrix
)

# Import our custom modules if they're needed for data loading
from data_mapper import NHANESImageMapper
from custom_data_generator import MultiModalDataGenerator

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)




## 2. Load the Trained Model

In [2]:
# Define the path to the saved model
model_path = r"C:\Users\Hency\OneDrive\Documents\mini proj\results\20250415-234926\final_nutritional_deficiency_model.h5"

# Check if the model file exists
if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model file not found at {model_path}")
    
# Load the model
print(f"Loading model from {model_path}...")
model = load_model(model_path)
print("Model loaded successfully!")

# Print the model summary
model.summary()

Loading model from C:\Users\Hency\OneDrive\Documents\mini proj\results\20250415-234926\final_nutritional_deficiency_model.h5...


Model loaded successfully!
Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 img_input (InputLayer)      [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 rescaling_2 (Rescaling)     (None, 224, 224, 3)          0         ['img_input[0][0]']           
                                                                                                  
 normalization_1 (Normaliza  (None, 224, 224, 3)          7         ['rescaling_2[0][0]']         
 tion)                                                                                            
                                  

## 3. Define the Deficiency Labels

In [3]:
# Define the names of nutritional deficiency categories
deficiency_names = [
    "Vitamin A Deficiency", "Vitamin B1 Deficiency", "Vitamin B2 Deficiency",
    "Vitamin B3 Deficiency", "Vitamin B5 Deficiency", "Vitamin B6 Deficiency",
    "Vitamin B7 Deficiency", "Vitamin B9 Deficiency", "Vitamin B12 Deficiency",
    "Vitamin C Deficiency", "Vitamin D Deficiency", "Vitamin E Deficiency",
    "Vitamin K Deficiency", "Calcium Deficiency", "Iron Deficiency",
    "Magnesium Deficiency", "Zinc Deficiency", "Protein Deficiency",
    "Omega-3 Fatty Acid Deficiency", "Iodine Deficiency", "Selenium Deficiency",
    "Potassium Deficiency", "Multiple Deficiencies"
]

print(f"Loaded {len(deficiency_names)} deficiency categories")

Loaded 23 deficiency categories


## 4. Load and Prepare Test Data

In [4]:
# Create the NHANES-Image mapper to load the dataset
print("Creating NHANES-Image mapper...")
mapper = NHANESImageMapper(
    nhanes_dir="dataset/NHANES",
    image_dir="dataset/Image data"
)

# Load NHANES data
nhanes_data = mapper.load_nhanes_data()
print(f"Loaded NHANES data with {len(nhanes_data)} records")

# Create paired dataset
print("Creating paired dataset...")
paired_dataset = mapper.create_paired_dataset(test_size=0.2)
print(f"Created paired dataset with {len(paired_dataset['test'])} testing samples")

# Extract the test data
test_data = paired_dataset['test']
num_classes = paired_dataset['num_classes']
print(f"Number of classes: {num_classes}")

# Create a test generator
batch_size = 16
test_generator = MultiModalDataGenerator(
    dataset=test_data,
    batch_size=batch_size,
    image_size=(224, 224),
    shuffle=False,
    augment=False,
    num_classes=num_classes
)

print(f"Created test generator with {len(test_generator)} batches")

Creating NHANES-Image mapper...
Diet data columns: ['SEQN', 'WTDRD1', 'WTDR2D', 'DR1DRSTZ', 'DR1EXMER', 'DRABF', 'DRDINT', 'DR1DBIH', 'DR1DAY', 'DR1LANG'] ...
Using diet columns: ['DR1TKCAL', 'DR1TPROT', 'DR1TCARB', 'DR1TTFAT', 'DR1DAY', 'DR1DBIH', 'WTDRD1', 'WTDR2D']
Using lab columns: ['LBXSCH', 'LBXTC', 'LBXSGL']
Using examination columns: ['BMXBMI', 'BPXSY1', 'BPXDI1', 'BMXWT', 'BMXHT']
Using demographic columns: ['RIAGENDR', 'RIDAGEYR', 'RIDRETH1', 'DMDEDUC2']
Merged data shape: (9813, 21)
Merged data columns: ['SEQN', 'DR1TKCAL', 'DR1TPROT', 'DR1TCARB', 'DR1TTFAT', 'DR1DAY', 'DR1DBIH', 'WTDRD1', 'WTDR2D', 'LBXSCH', 'LBXTC', 'LBXSGL', 'BMXBMI', 'BPXSY1', 'BPXDI1', 'BMXWT', 'BMXHT', 'RIAGENDR', 'RIDAGEYR', 'RIDRETH1', 'DMDEDUC2']
Loaded NHANES data with 9813 records
Creating paired dataset...
Created paired dataset with 1963 testing samples
Number of classes: 23
Created test generator with 123 batches


## 5. Generate Predictions

In [5]:
# Function to generate all predictions
def generate_all_predictions(model, data_generator):
    print("Generating predictions...")
    # Initialize arrays to store predictions and true labels
    all_predictions = []
    all_true_labels = []
    
    # Process each batch
    for i in range(len(data_generator)):
        if i % 10 == 0:
            pass# print(f"Processing batch {i+1}/{len(data_generator)}")
            
        # Get the batch inputs and labels
        inputs, labels = data_generator[i]
        
        # Get model predictions for this batch
        batch_predictions = model.predict(inputs, verbose=0)
        
        # Store predictions and true labels
        all_predictions.append(batch_predictions)
        all_true_labels.append(labels)
    
    # Concatenate batch results
    y_pred_probs = np.vstack(all_predictions)
    y_true = np.vstack(all_true_labels)
    print(all_predictions)
    print(f"Generated predictions for {y_pred_probs.shape[0]} samples")
    return y_pred_probs, y_true

# Generate predictions
y_pred_probs, y_true = generate_all_predictions(model, test_generator)

# Convert probabilities to binary predictions using a threshold of 0.5
y_pred_binary = (y_pred_probs >= 0.5).astype(int)

# Also get class-wise predictions (for traditional classification metrics)
y_pred_classes = np.argmax(y_pred_probs, axis=1)
y_true_classes = np.argmax(y_true, axis=1)

Generating predictions...
[array([[7.79074850e-03, 9.49963578e-04, 2.85467945e-06, 3.13574856e-06,
        3.73265357e-04, 7.52203353e-03, 4.10364873e-06, 2.02170122e-04,
        3.09356983e-06, 7.99853131e-02, 2.78718949e-06, 1.61865091e-06,
        3.10821524e-05, 3.95295638e-06, 8.26082134e-04, 2.13882709e-06,
        2.33186915e-06, 9.02278721e-01, 2.43359091e-06, 2.09454583e-06,
        3.19559581e-06, 4.16431021e-06, 2.78240395e-06],
       [4.68953294e-05, 5.17987610e-05, 6.82120245e-08, 5.62429783e-08,
        4.75469120e-02, 1.09375780e-03, 6.88034305e-08, 1.52546099e-05,
        6.80900953e-08, 1.67244661e-03, 7.58780487e-08, 4.64941188e-08,
        3.12970427e-04, 1.30685805e-07, 9.49105918e-01, 5.09201463e-08,
        5.92805982e-08, 1.53068177e-04, 4.66711931e-08, 3.98922815e-08,
        9.92475293e-08, 9.12154761e-08, 5.53154891e-08],
       [6.95468858e-02, 8.75634374e-04, 3.33504272e-06, 3.50689766e-06,
        1.08000531e-04, 3.24943587e-02, 4.19447815e-06, 8.32080841e

## 6. Implement and Calculate Evaluation Metrics

In [6]:
# 1. Hamming Loss
hamming_loss_value = hamming_loss(y_true, y_pred_binary)

# 2. Subset Accuracy (Exact Match Ratio)
def subset_accuracy(y_true, y_pred):
    # Check if all predictions match for each sample
    exact_match = np.all(y_true == y_pred, axis=1)
    # Return the proportion of samples with exact matches
    return np.mean(exact_match)

subset_accuracy_value = subset_accuracy(y_true, y_pred_binary)

# 3. Micro-Averaged F1 Score
micro_f1 = f1_score(y_true.flatten(), y_pred_binary.flatten(), average='micro')

# 4. Macro-Averaged ROC-AUC Score
# Handle case where some classes might not have samples in the test set
try:
    macro_roc_auc = roc_auc_score(y_true, y_pred_probs, average='macro')
except ValueError as e:
    print(f"Warning when calculating ROC AUC: {e}")
    # Calculate for each class where possible and then average
    class_roc_aucs = []
    for i in range(y_true.shape[1]):
        if len(np.unique(y_true[:, i])) > 1:  # Check if class has multiple values
            try:
                class_roc_aucs.append(roc_auc_score(y_true[:, i], y_pred_probs[:, i]))
            except Exception:
                pass
    macro_roc_auc = np.mean(class_roc_aucs) if class_roc_aucs else float('nan')

# 5. Macro-Averaged Precision-Recall AUC
def macro_precision_recall_auc(y_true, y_pred_probs):
    pr_aucs = []
    for i in range(y_true.shape[1]):
        if len(np.unique(y_true[:, i])) > 1:  # Check if class has multiple values
            try:
                precision, recall, _ = precision_recall_curve(y_true[:, i], y_pred_probs[:, i])
                pr_aucs.append(auc(recall, precision))
            except Exception:
                pass
    return np.mean(pr_aucs) if pr_aucs else float('nan')

macro_pr_auc = macro_precision_recall_auc(y_true, y_pred_probs)

# Accuracy (based on most likely class)
accuracy = accuracy_score(y_true_classes, y_pred_classes)

# Calculate additional metrics for comprehensive evaluation
macro_f1 = f1_score(y_true_classes, y_pred_classes, average='macro')
weighted_f1 = f1_score(y_true_classes, y_pred_classes, average='weighted')

## 7. Display Metrics

In [7]:
# Create a DataFrame to display metrics
metrics_df = pd.DataFrame([
    ["Hamming Loss", hamming_loss_value, "Lower is better"],
    ["Subset Accuracy (Exact Match Ratio)", subset_accuracy_value, "Higher is better"],
    ["Micro-Averaged F1 Score", micro_f1, "Higher is better"],
    ["Macro-Averaged Precision-Recall AUC", macro_pr_auc, "Higher is better"],
    ["Accuracy (Most Likely Class)", accuracy, "Higher is better"],
    ["Macro-Averaged F1 Score", macro_f1, "Higher is better"],
    ["Weighted F1 Score", weighted_f1, "Higher is better"]
], columns=["Metric", "Value", "Interpretation"])

# Style the DataFrame
styled_metrics = metrics_df.style.set_properties(**{
    'text-align': 'left',
    'border': '1px solid black',
    'padding': '5px'
}).format({"Value": "{:.4f}"})

# Display metrics
display(styled_metrics)

# Print a summary
print("\nModel Evaluation Summary:")
print(f"The model achieves an accuracy of {accuracy:.2%} for the most likely class.")
print(f"The hamming loss of {hamming_loss_value:.4f} indicates the proportion of misclassified labels.")
print(f"The subset accuracy of {subset_accuracy_value:.2%} shows the percentage of samples where all labels are correctly predicted.")

Unnamed: 0,Metric,Value,Interpretation
0,Hamming Loss,0.0158,Lower is better
1,Subset Accuracy (Exact Match Ratio),0.7906,Higher is better
2,Micro-Averaged F1 Score,0.9842,Higher is better
3,Macro-Averaged Precision-Recall AUC,0.8689,Higher is better
4,Accuracy (Most Likely Class),0.811,Higher is better
5,Macro-Averaged F1 Score,0.7807,Higher is better
6,Weighted F1 Score,0.8089,Higher is better



Model Evaluation Summary:
The model achieves an accuracy of 81.10% for the most likely class.
The hamming loss of 0.0158 indicates the proportion of misclassified labels.
The subset accuracy of 79.06% shows the percentage of samples where all labels are correctly predicted.


## 8. Generate Detailed Classification Report

In [8]:
# Get unique classes in y_true_classes
unique_classes = np.unique(y_true_classes)

# Adjust target_names to match the unique classes
adjusted_target_names = [deficiency_names[i][:15] + '...' if len(deficiency_names[i]) > 15 else deficiency_names[i] for i in unique_classes]

# Get classification report for single-label prediction (most likely class)
class_report = classification_report(y_true_classes, y_pred_classes, target_names=adjusted_target_names, output_dict=True)

# Convert to DataFrame for better visualization
class_report_df = pd.DataFrame(class_report).transpose()

# Display the classification report
display(class_report_df.style.set_caption("Classification Report").format(precision=2))

Unnamed: 0,precision,recall,f1-score,support
Vitamin A Defic...,0.7,0.78,0.74,96.0
Vitamin B1 Defi...,0.74,0.79,0.77,29.0
Vitamin B5 Defi...,0.74,0.83,0.79,253.0
Vitamin B6 Defi...,0.78,0.48,0.59,98.0
Vitamin B9 Defi...,0.89,0.99,0.94,282.0
Vitamin C Defic...,0.74,0.77,0.75,327.0
Vitamin K Defic...,0.98,0.91,0.94,345.0
Iron Deficiency,0.8,0.7,0.75,239.0
Protein Deficie...,0.75,0.77,0.76,294.0
accuracy,0.81,0.81,0.81,0.81
