In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms, models
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

# Define paths to data
root_dir = r"C:\Users\avs20\Documents\GitHub\FacialExpressionAI_Tanaka2023\Squeezenet_FullImage\data"  # Replace with the root directory containing 'pain', 'tickle', and 'normal' folders

# Define transformations with data augmentation
transform = transforms.Compose([
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the dataset
dataset = datasets.ImageFolder(root=root_dir, transform=transform)

# Split the dataset (64:16:20)
total_size = len(dataset)
train_size = int(0.64 * total_size)
val_size = int(0.16 * total_size)
test_size = total_size - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Create data loaders
batch_size = 512
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Load the pre-trained SqueezeNet model and fine-tune
model = models.squeezenet1_1(pretrained=True)
model.classifier[1] = nn.Conv2d(512, 3, kernel_size=1)  # Modify for 3 classes
model.num_classes = 3

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=3e-4, momentum=0.9)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Training loop
num_epochs = 10
val_interval = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}")

    # Validation at intervals
    if (epoch + 1) % val_interval == 0:
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        print(f"Validation Loss: {val_loss/len(val_loader):.4f}, Accuracy: {100 * correct / total:.2f}%")

# Testing the model
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Confusion matrix, sensitivity, and specificity
conf_matrix = confusion_matrix(all_labels, all_preds)
sensitivity = conf_matrix.diagonal() / conf_matrix.sum(axis=1)
specificity = (conf_matrix.sum() - conf_matrix.sum(axis=0) - conf_matrix.sum(axis=1) + conf_matrix.diagonal()) / (conf_matrix.sum() - conf_matrix.sum(axis=1))

print("Confusion Matrix:")
print(conf_matrix)
print(f"Sensitivity: {sensitivity}")
print(f"Specificity: {specificity}")




In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms, models
import pandas as pd
from sklearn.metrics import confusion_matrix

# Define paths to data
root_dir = r"C:\Users\avs20\Documents\GitHub\FacialExpressionAI_Tanaka2023\Squeezenet_FullImage\data"  # Replace with your dataset path

# Define transformations with data augmentation
transform = transforms.Compose([
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the dataset
dataset = datasets.ImageFolder(root=root_dir, transform=transform)

# Split the dataset (64:16:20)
total_size = len(dataset)
train_size = int(0.64 * total_size)
val_size = int(0.16 * total_size)
test_size = total_size - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Create data loaders
batch_size = 512
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Load the pre-trained SqueezeNet model and fine-tune
model = models.squeezenet1_1(pretrained=True)
model.classifier[1] = nn.Conv2d(512, 3, kernel_size=1)  # Modify for 3 classes
model.num_classes = 3

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=3e-4, momentum=0.9)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Hook to extract feature vectors
def extract_features(images):
    with torch.no_grad():
        # Pass images through the feature extractor
        features = model.features(images)
        # Global Average Pooling to reduce dimensions
        features = nn.functional.adaptive_avg_pool2d(features, (1, 1)).squeeze(-1).squeeze(-1)
    return features

# Function to save feature vectors to a CSV file
def save_features_to_csv(data, file_name):
    df = pd.DataFrame(data, columns=["Image_Name", "Class", "Feature_Vector"])
    df.to_csv(file_name, index=False)

# Training loop
num_epochs = 110
val_interval = 1
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    if epoch == num_epochs - 1:  # To store features during the final training epoch
        final_train_features = []

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        running_loss += loss.item()

        # Store feature vectors in the final epoch
        if epoch == num_epochs - 1:
            features = extract_features(inputs).cpu().numpy()
            image_names = [dataset.samples[idx][0] for idx in range(len(dataset.samples))]
            for name, label, feature in zip(image_names, labels.cpu().numpy(), features):
                final_train_features.append([name, label, feature.tolist()])

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}, Accuracy: {100 * correct / total:.2f}%")

    # Validation at intervals
    if (epoch + 1) % val_interval == 0:
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        print(f"Validation Loss: {val_loss/len(val_loader):.4f}, Accuracy: {100 * correct / total:.2f}%")

# Save final training features
if final_train_features:
    save_features_to_csv(final_train_features, "final_train_features.csv")

# Testing the model and extracting features
model.eval()
test_features = []
all_preds = []
all_labels = []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)

        # Collect predictions and labels
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

        # Extract and save feature vectors for test data
        features = extract_features(inputs).cpu().numpy()
        image_names = [dataset.samples[idx][0] for idx in range(len(dataset.samples))]
        for name, label, feature in zip(image_names, labels.cpu().numpy(), features):
            test_features.append([name, label, feature.tolist()])

# Save test features
save_features_to_csv(test_features, "test_features.csv")

# Confusion matrix
conf_matrix = confusion_matrix(all_labels, all_preds)
print("Confusion Matrix:")
print(conf_matrix)


In [13]:
# Save the model
torch.save(model.state_dict(), "squeezenet_model.pth")


In [None]:
import torch
import torch.nn as nn
from torchvision import models

# Load the model architecture
model = models.squeezenet1_1(pretrained=False)  # Use pretrained=False when loading a saved model
model.classifier[1] = nn.Conv2d(512, 3, kernel_size=1)  # Ensure the same architecture
model.num_classes = 3

# Load the saved state dict
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load("squeezenet_model.pth", map_location=device))
model = model.to(device)
model.eval()  # Set the model to evaluation mode


In [None]:
img_dir = r'C:\Users\avs20\Documents\GitHub\FacialExpressionAI_Tanaka2023\Squeezenet_FullImage\data\examples'
# img_name = 'collies.JPG'
# img_name = 'multiple_dogs.jpg'
# img_name = 'snake.JPEG'
img_name = 'pain_0004.bmp'
img_path = os.path.join(img_dir, img_name)

pil_img = PIL.Image.open(img_path)
pil_img

In [None]:
normalizer = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
torch_img = torch.from_numpy(np.asarray(pil_img)).permute(2, 0, 1).unsqueeze(0).float().div(255).cuda()
torch_img = F.upsample(torch_img, size=(224, 224), mode='bilinear', align_corners=False)
normed_torch_img = normalizer(torch_img)

In [None]:
#squeezenet = models.squeezenet1_1(pretrained=True)

import torch
import torch.nn as nn
from torchvision import models

# Load the model architecture
squeezenet = models.squeezenet1_1(pretrained=False)  # Use pretrained=False when loading a saved model
squeezenet.classifier[1] = nn.Conv2d(512, 3, kernel_size=1)  # Ensure the same architecture
squeezenet.num_classes = 3

# Load the saved state dict
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
squeezenet.load_state_dict(torch.load("squeezenet_model.pth", map_location=device))
squeezenet = squeezenet.to(device)
squeezenet.eval()  # Set the model to evaluation mode


squeezenet.eval(), squeezenet.cuda();


cam_dict = dict()

In [None]:
squeezenet_model_dict = dict(type='squeezenet', arch=squeezenet, layer_name='features_12_expand3x3_activation', input_size=(224, 224))
squeezenet_gradcam = GradCAM(squeezenet_model_dict, True)
squeezenet_gradcampp = GradCAMpp(squeezenet_model_dict, True)
cam_dict['squeezenet'] = [squeezenet_gradcam, squeezenet_gradcampp]

In [None]:
images = []
for gradcam, gradcam_pp in cam_dict.values():
    mask, _ = gradcam(normed_torch_img)
    heatmap, result = visualize_cam(mask, torch_img)

    mask_pp, _ = gradcam_pp(normed_torch_img)
    heatmap_pp, result_pp = visualize_cam(mask_pp, torch_img)
    
    images.append(torch.stack([torch_img.squeeze().cpu(), heatmap, heatmap_pp, result, result_pp], 0))
    
images = make_grid(torch.cat(images, 0), nrow=5)

In [None]:
output_dir = 'outputs'
os.makedirs(output_dir, exist_ok=True)
output_name = img_name
output_path = os.path.join(output_dir, output_name)

save_image(images, output_path)
PIL.Image.open(output_path)

In [None]:
import os
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from torchvision.utils import make_grid, save_image
from torchvision import models
from torchvision.transforms import Normalize
import PIL.Image
from matplotlib import pyplot as plt

from utils import visualize_cam, Normalize
from gradcam import GradCAM, GradCAMpp
import cv2

# Define input and output directories
img_dir = r'C:\Users\avs20\Documents\GitHub\FacialExpressionAI_Tanaka2023\Squeezenet_FullImage\data\examples'
output_dir = 'outputs'
os.makedirs(output_dir, exist_ok=True)

# Initialize the model
squeezenet = models.squeezenet1_1(pretrained=False)
squeezenet.classifier[1] = nn.Conv2d(512, 3, kernel_size=1)
squeezenet.num_classes = 3

# Load the saved model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
squeezenet.load_state_dict(torch.load("squeezenet_model.pth", map_location=device))
squeezenet = squeezenet.to(device)
squeezenet.eval()

# Initialize GradCAM and GradCAM++
squeezenet_model_dict = dict(
    type='squeezenet',
    arch=squeezenet,
    layer_name='features_12_expand3x3_activation',
    input_size=(224, 224),
)
gradcam = GradCAM(squeezenet_model_dict, True)
gradcam_pp = GradCAMpp(squeezenet_model_dict, True)
cam_dict = {'Grad-CAM': gradcam, 'Grad-CAM++': gradcam_pp}

# Define normalizer
normalizer = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

# Process all images in img_dir
for img_name in os.listdir(img_dir):
    img_path = os.path.join(img_dir, img_name)
    if not img_name.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
        continue  # Skip non-image files

    # Load and preprocess the image
    pil_img = PIL.Image.open(img_path).convert("RGB")
    torch_img = torch.from_numpy(np.asarray(pil_img)).permute(2, 0, 1).unsqueeze(0).float().div(255).to(device)
    torch_img = F.interpolate(torch_img, size=(224, 224), mode='bilinear', align_corners=False) #try hashing this out to avoid images becoming squares
    normed_torch_img = normalizer(torch_img)

    # Generate Grad-CAM and Grad-CAM++ heatmaps
    images = [torch_img.squeeze().cpu()]  # Start with the original image
    titles = [f"Original Image: {img_name}"]  # Add the original image title

    for cam_name, cam_method in cam_dict.items():
        mask, _ = cam_method(normed_torch_img)
        heatmap, result = visualize_cam(mask, torch_img)

        # Collect images and titles for plotting
        images.extend([heatmap, result])
        titles.extend([f"{cam_name} Heatmap", f"{cam_name} Result"])

    # Plot and save the results
    fig, axs = plt.subplots(1, len(images), figsize=(15, 5))
    for i, img in enumerate(images):
        axs[i].imshow(img.permute(1, 2, 0).cpu().numpy())
        axs[i].set_title(titles[i], fontsize=10)
        axs[i].axis('off')

    plt.tight_layout()
    output_path = os.path.join(output_dir, f"{os.path.splitext(img_name)[0]}_heatmaps.png")
    plt.savefig(output_path)
    plt.close(fig)


In [1]:
#Code with square pictures

import os
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from torchvision import models, transforms
from torchvision.transforms import Normalize
from torch.utils.data import Dataset, DataLoader
import PIL.Image
from matplotlib import pyplot as plt

#from utils import visualize_cam
from viz_cam_updated2 import visualize_cam
from gradcam import GradCAM, GradCAMpp
import cv2


# Define input and output directories
img_dir = r'C:\Users\avs20\Documents\GitHub\FacialExpressionAI_Tanaka2023\Squeezenet_FullImage\data\examples'
output_dir = 'outputs'
os.makedirs(output_dir, exist_ok=True)

# Define custom dataset
class ImageDataset(Dataset):
    def __init__(self, img_dir, transform=None):
        self.img_dir = img_dir
        self.img_paths = [
            os.path.join(img_dir, img_name)
            for img_name in os.listdir(img_dir)
            if img_name.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp'))
        ]
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        img = PIL.Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, os.path.basename(img_path)  # Return image and its file name

# Define transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load dataset and dataloader
dataset = ImageDataset(img_dir=img_dir, transform=transform)
data_loader = DataLoader(dataset, batch_size=1, shuffle=False)

# Initialize the model
squeezenet = models.squeezenet1_1(pretrained=False)
squeezenet.classifier[1] = nn.Conv2d(512, 3, kernel_size=1)
squeezenet.num_classes = 3

# Load the saved model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
squeezenet.load_state_dict(torch.load("squeezenet_model.pth", map_location=device))
squeezenet = squeezenet.to(device)
squeezenet.eval()

# Initialize GradCAM and GradCAM++
dummy_input = next(iter(data_loader))[0][0]  # Get a single sample to determine dimensions
input_size = dummy_input.shape[1:]  # Extract height and width

squeezenet_model_dict = dict(
    type='squeezenet',
    arch=squeezenet,
    layer_name='features_12_expand3x3_activation',
    input_size=input_size,
)
gradcam = GradCAM(squeezenet_model_dict, True)
gradcam_pp = GradCAMpp(squeezenet_model_dict, True)
cam_dict = {'Grad-CAM': gradcam, 'Grad-CAM++': gradcam_pp}

# Process all images in the dataset
for images, img_names in data_loader:
    images = images.to(device)

    # Normalize input image
    normed_images = images.clone()  # Normalization already applied in transform

    # Generate Grad-CAM and Grad-CAM++ heatmaps
    images = [images[0].cpu()]  # Start with the original image
    titles = [f"Original Image: {img_names[0]}"]  # Add the original image title

    for cam_name, cam_method in cam_dict.items():
        mask, _ = cam_method(normed_images)
        heatmap, result = visualize_cam(mask, images[0])

        # Collect images and titles for plotting
        images.extend([heatmap, result])
        titles.extend([f"{cam_name} Heatmap", f"{cam_name} Result"])

    # Plot and save the results
    fig, axs = plt.subplots(1, len(images), figsize=(15, 5))
    for i, img in enumerate(images):
        axs[i].imshow(img.permute(1, 2, 0).cpu().numpy())
        axs[i].set_title(titles[i], fontsize=10)
        axs[i].axis('off')

    plt.tight_layout()
    output_path = os.path.join(output_dir, f"{os.path.splitext(img_names[0])[0]}_heatmaps.png")
    plt.savefig(output_path)
    plt.close(fig)




saliency_map size : torch.Size([8, 14])
saliency_map size : torch.Size([8, 14])


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping i

In [2]:
import os
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from torchvision import models, transforms
from torchvision.transforms import Normalize
from torch.utils.data import Dataset, DataLoader
import PIL.Image
from matplotlib import pyplot as plt
import cv2

from viz_cam_updated2 import visualize_cam
from gradcam import GradCAM, GradCAMpp

# Define input and output directories
img_dir = r'C:\Users\avs20\Documents\GitHub\FacialExpressionAI_Tanaka2023\Squeezenet_FullImage\data\examples'
output_dir = 'outputs'
os.makedirs(output_dir, exist_ok=True)

# Define custom dataset
class ImageDataset(Dataset):
    def __init__(self, img_dir, transform=None):
        self.img_dir = img_dir
        self.img_paths = [
            os.path.join(img_dir, img_name)
            for img_name in os.listdir(img_dir)
            if img_name.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp'))
        ]
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        img = PIL.Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, os.path.basename(img_path)  # Return image and its file name

# Define transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Model normalization
])

# Load dataset and dataloader
dataset = ImageDataset(img_dir=img_dir, transform=transform)
data_loader = DataLoader(dataset, batch_size=1, shuffle=False)

# Initialize the model
squeezenet = models.squeezenet1_1(pretrained=False)
squeezenet.classifier[1] = nn.Conv2d(512, 3, kernel_size=1)
squeezenet.num_classes = 3

# Load the saved model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
squeezenet.load_state_dict(torch.load("squeezenet_model.pth", map_location=device))
squeezenet = squeezenet.to(device)
squeezenet.eval()

# Initialize GradCAM and GradCAM++
dummy_input = next(iter(data_loader))[0][0]  # Get a single sample to determine dimensions
input_size = dummy_input.shape[1:]  # Extract height and width

squeezenet_model_dict = dict(
    type='squeezenet',
    arch=squeezenet,
    layer_name='features_12_expand3x3_activation',
    input_size=input_size,
)
gradcam = GradCAM(squeezenet_model_dict, True)
gradcam_pp = GradCAMpp(squeezenet_model_dict, True)
cam_dict = {'Grad-CAM': gradcam, 'Grad-CAM++': gradcam_pp}

# Unnormalize function for displaying the original image
def unnormalize(tensor, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
    mean = torch.tensor(mean).view(3, 1, 1)
    std = torch.tensor(std).view(3, 1, 1)
    return tensor * std + mean  # Reverse the normalization

# Process all images in the dataset
for images, img_names in data_loader:
    images = images.to(device)

    # Normalize input image
    normed_images = images.clone()  # Normalization already applied in transform

    # Unnormalize the original image before displaying
    unnormed_image = unnormalize(images[0].cpu()).clamp(0, 1)  # Ensure valid range [0,1]
    images = [unnormed_image]  # Use unnormalized image instead

    titles = [f"Original Image: {img_names[0]}"]  # Add the original image title

    for cam_name, cam_method in cam_dict.items():
        mask, _ = cam_method(normed_images)
        heatmap, result = visualize_cam(mask, images[0])

        # Collect images and titles for plotting
        images.extend([heatmap, result])
        titles.extend([f"{cam_name} Heatmap", f"{cam_name} Result"])

    # Plot and save the results
    fig, axs = plt.subplots(1, len(images), figsize=(15, 5))
    for i, img in enumerate(images):
        axs[i].imshow(img.permute(1, 2, 0).cpu().numpy())
        axs[i].set_title(titles[i], fontsize=10)
        axs[i].axis('off')

    plt.tight_layout()
    output_path = os.path.join(output_dir, f"{os.path.splitext(img_names[0])[0]}_heatmaps.png")
    plt.savefig(output_path)
    plt.close(fig)


saliency_map size : torch.Size([8, 14])
saliency_map size : torch.Size([8, 14])
