In [None]:
import os
import torch
from PIL import Image
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
import random
from collections import Counter
import numpy as np

# Set random seed for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(29)

# Define paths to dataset directories
hamiltonian_dir = '0p35_sp_uniform_color_spiral_hamiltonian_medium'
non_hamiltonian_dir = '0p35_sp_uniform_color_spiral_non_hamiltonian_medium'

# Define data transformations
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # ImageNet normalization
])

# Custom Dataset class
class CustomImageDataset(Dataset):
    def __init__(self, image_dir, label, transform=None):
        self.image_dir = image_dir
        self.label = label
        self.transform = transform
        self.image_files = [f for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, f))]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_files[idx])
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, self.label

# Create datasets for each class
hamiltonian_dataset = CustomImageDataset(hamiltonian_dir, label=1, transform=data_transforms)
non_hamiltonian_dataset = CustomImageDataset(non_hamiltonian_dir, label=0, transform=data_transforms)

# Combine datasets
dataset = torch.utils.data.ConcatDataset([hamiltonian_dataset, non_hamiltonian_dataset])

# Split dataset into training, validation, and test sets
indices = list(range(len(dataset)))
train_val_size = 100
test_size = 500

train_val_indices, test_indices = train_test_split(indices, test_size=test_size, stratify=[dataset[i][1] for i in indices], random_state=42)
train_indices, val_indices = train_test_split(train_val_indices[:train_val_size], test_size=0.2, stratify=[dataset[i][1] for i in train_val_indices[:train_val_size]], random_state=42)

train_dataset = Subset(dataset, train_indices)
val_dataset = Subset(dataset, val_indices)
test_dataset = Subset(dataset, test_indices)

# Define data loaders
train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=10, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=10, shuffle=False)

# Initialize the ResNet-50 model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2)  # Change the final layer for binary classification
model = model.to(device)

# Define optimizer and loss function
optimizer = optim.AdamW(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

# Define function to compute metrics
def compute_metrics(preds, labels):
    preds = preds.detach().cpu().numpy()
    labels = labels.detach().cpu().numpy()
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='macro')
    return accuracy, f1

# Evaluation function
def evaluate(model, dataloader, device, criterion):
    model.eval()
    total_loss, correct, total = 0, 0, 0
    all_preds, all_labels = [], []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = correct / total
    f1 = f1_score(all_labels, all_preds, average='macro')
    return total_loss / len(dataloader), accuracy, f1

# Training function
def train(model, train_loader, val_loader, test_loader, device, optimizer, criterion, num_epochs=100, patience=10):
    best_val_loss = float('inf')
    best_accuracy = 0
    early_stop_counter = 0
    best_model_path = './best_resnet50_model.pth'
    columns = ['epoch', 'train_loss', 'train_acc', 'train_f1', 'val_loss', 'val_acc', 'val_f1', 'test_loss', 'test_acc', 'test_f1']
    results = []

    for epoch in range(1, num_epochs + 1):
        model.train()
        train_loss, correct, total = 0, 0, 0
        all_preds, all_labels = [], []

        for inputs, labels in tqdm(train_loader, desc=f'Epoch {epoch}/{num_epochs}'):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

        train_acc, train_f1 = compute_metrics(torch.tensor(all_preds), torch.tensor(all_labels))
        train_loss /= total

        # Validate
        val_loss, val_acc, val_f1 = evaluate(model, val_loader, device, criterion)

        # Early stopping
        if val_acc > best_accuracy:
            best_accuracy = val_acc
            early_stop_counter = 0
            torch.save(model.state_dict(), best_model_path)
            print(f"Best model saved with validation accuracy: {val_acc:.2f}%")
        else:
            early_stop_counter += 1
            print(f"Epochs without improvement: {early_stop_counter}")
        
        if early_stop_counter >= patience:
            print("Early stopping triggered.")
            break

        # Test evaluation
        test_loss, test_acc, test_f1 = evaluate(model, test_loader, device, criterion)

        # Log results
        results.append([epoch, train_loss, train_acc, train_f1, val_loss, val_acc, val_f1, test_loss, test_acc, test_f1])
        df = pd.DataFrame(results, columns=columns)
        df.to_csv('resnet50_training_results.csv', index=False)

        print(f"Epoch {epoch}/{num_epochs}: Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Test Loss: {test_loss:.4f}")

    print("Training complete. Best model saved as best_resnet50_model.pth")

# Train the model
train(model, train_loader, val_loader, test_loader, device, optimizer, criterion, num_epochs=100, patience=10)


Epoch 1/100: 100%|██████████| 8/8 [00:00<00:00, 19.56it/s]


Best model saved with validation accuracy: 0.40%
Epoch 1/100: Train Loss: 1.3613, Val Loss: 91.8199, Test Loss: 86.7780


Epoch 2/100: 100%|██████████| 8/8 [00:00<00:00, 19.69it/s]


Best model saved with validation accuracy: 0.60%
Epoch 2/100: Train Loss: 0.7457, Val Loss: 23.2389, Test Loss: 27.5171


Epoch 3/100: 100%|██████████| 8/8 [00:00<00:00, 19.61it/s]


Epochs without improvement: 1
Epoch 3/100: Train Loss: 0.5752, Val Loss: 18.7483, Test Loss: 22.1793


Epoch 4/100: 100%|██████████| 8/8 [00:00<00:00, 19.97it/s]


Best model saved with validation accuracy: 0.65%
Epoch 4/100: Train Loss: 0.5008, Val Loss: 12.9880, Test Loss: 19.7851


Epoch 5/100: 100%|██████████| 8/8 [00:00<00:00, 19.86it/s]


Epochs without improvement: 1
Epoch 5/100: Train Loss: 0.4623, Val Loss: 25.2851, Test Loss: 39.9106


Epoch 6/100: 100%|██████████| 8/8 [00:00<00:00, 19.94it/s]


Best model saved with validation accuracy: 0.75%
Epoch 6/100: Train Loss: 0.4574, Val Loss: 8.6668, Test Loss: 9.1274


Epoch 7/100: 100%|██████████| 8/8 [00:00<00:00, 19.80it/s]


Epochs without improvement: 1
Epoch 7/100: Train Loss: 0.2989, Val Loss: 6.9745, Test Loss: 8.2519


Epoch 8/100: 100%|██████████| 8/8 [00:00<00:00, 19.50it/s]


Best model saved with validation accuracy: 0.80%
Epoch 8/100: Train Loss: 0.1841, Val Loss: 4.2553, Test Loss: 7.5840


Epoch 9/100: 100%|██████████| 8/8 [00:00<00:00, 19.80it/s]


Best model saved with validation accuracy: 0.85%
Epoch 9/100: Train Loss: 0.0648, Val Loss: 3.3203, Test Loss: 10.4796


Epoch 10/100: 100%|██████████| 8/8 [00:00<00:00, 19.84it/s]


Epochs without improvement: 1
Epoch 10/100: Train Loss: 0.0762, Val Loss: 3.0570, Test Loss: 9.8209


Epoch 11/100: 100%|██████████| 8/8 [00:00<00:00, 19.76it/s]


Epochs without improvement: 2
Epoch 11/100: Train Loss: 0.1213, Val Loss: 7.7245, Test Loss: 15.3310


Epoch 12/100: 100%|██████████| 8/8 [00:00<00:00, 19.79it/s]


Epochs without improvement: 3
Epoch 12/100: Train Loss: 0.1366, Val Loss: 16.8222, Test Loss: 25.0387


Epoch 13/100: 100%|██████████| 8/8 [00:00<00:00, 19.77it/s]


Best model saved with validation accuracy: 0.95%
Epoch 13/100: Train Loss: 0.1403, Val Loss: 1.8212, Test Loss: 18.9394


Epoch 14/100: 100%|██████████| 8/8 [00:00<00:00, 19.57it/s]


Epochs without improvement: 1
Epoch 14/100: Train Loss: 0.2050, Val Loss: 14.4516, Test Loss: 12.9497


Epoch 15/100: 100%|██████████| 8/8 [00:00<00:00, 19.58it/s]


Epochs without improvement: 2
Epoch 15/100: Train Loss: 0.3065, Val Loss: 12.9006, Test Loss: 14.6456


Epoch 16/100: 100%|██████████| 8/8 [00:00<00:00, 19.62it/s]


Epochs without improvement: 3
Epoch 16/100: Train Loss: 0.1981, Val Loss: 7.1498, Test Loss: 10.1616


Epoch 17/100: 100%|██████████| 8/8 [00:00<00:00, 19.60it/s]


Epochs without improvement: 4
Epoch 17/100: Train Loss: 0.1972, Val Loss: 7.2137, Test Loss: 8.7014


Epoch 18/100: 100%|██████████| 8/8 [00:00<00:00, 19.72it/s]


Epochs without improvement: 5
Epoch 18/100: Train Loss: 0.0474, Val Loss: 4.9684, Test Loss: 9.3238


Epoch 19/100: 100%|██████████| 8/8 [00:00<00:00, 19.75it/s]


Epochs without improvement: 6
Epoch 19/100: Train Loss: 0.0226, Val Loss: 4.0197, Test Loss: 9.3932


Epoch 20/100: 100%|██████████| 8/8 [00:00<00:00, 19.49it/s]


Epochs without improvement: 7
Epoch 20/100: Train Loss: 0.0117, Val Loss: 3.4726, Test Loss: 9.5404


Epoch 21/100: 100%|██████████| 8/8 [00:00<00:00, 19.35it/s]


Epochs without improvement: 8
Epoch 21/100: Train Loss: 0.0345, Val Loss: 7.6141, Test Loss: 10.9860


Epoch 22/100: 100%|██████████| 8/8 [00:00<00:00, 19.56it/s]


Epochs without improvement: 9
Epoch 22/100: Train Loss: 0.0222, Val Loss: 8.6798, Test Loss: 9.8998


Epoch 23/100: 100%|██████████| 8/8 [00:00<00:00, 19.56it/s]

Epochs without improvement: 10
Early stopping triggered.
Training complete. Best model saved as best_resnet50_model.pth



