#  Resnet-50

In [None]:
import os
import time
import copy
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np


torch.manual_seed(42)
np.random.seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
}

data_dir = '/remote-home/cs_acmis_hby/Galaxy-Zoo-Classification/Contrast_experiment/Galaxy-Classification-Using-CNN/output_dataset'

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val', 'test']}

batch_size = 64

dataloaders = {x: torch.utils.data.DataLoader(
                    image_datasets[x], batch_size=batch_size,
                    shuffle=(x=='train'), num_workers=4)
               for x in ['train', 'val', 'test']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
class_names = image_datasets['train'].classes
print(f"Train samples: {dataset_sizes['train']}")
print(f"Validation samples: {dataset_sizes['val']}")
print(f"Test samples: {dataset_sizes['test']}")


model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(class_names))
model = model.to(device)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)


def train_model(model, criterion, optimizer, scheduler, num_epochs=30):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 20)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed//60:.0f}m {time_elapsed%60:.0f}s')
    print(f'Best val Acc: {best_acc:.4f}')

    model.load_state_dict(best_model_wts)
    return model

model = train_model(model, criterion, optimizer, scheduler, num_epochs=30)
torch.save(model.state_dict(), 'resnet50_galaxy.pth')


def evaluate_model(model):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in dataloaders['test']:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    report = classification_report(all_labels, all_preds, target_names=class_names)
    print("Classification Report:\n", report)

    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', 
                xticklabels=class_names, yticklabels=class_names, cmap='Blues')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.show()

evaluate_model(model)


In [None]:
import os
import torch
import torch.nn as nn
from torchvision import models, datasets, transforms
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

data_dir = '/remote-home/cs_acmis_hby/Galaxy-Zoo-Classification/Contrast_experiment/Galaxy-Classification-Using-CNN/output_dataset'
model_path = '/remote-home/cs_acmis_hby/Galaxy-Zoo-Classification/Contrast_experiment/resnet50_galaxy.pth'

data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

test_dataset = datasets.ImageFolder(os.path.join(data_dir, 'test'), transform=data_transforms)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)
class_names = test_dataset.classes
print(f"Test classes: {class_names}")
print(f"Test samples: {len(test_dataset)}")

model = models.resnet50(pretrained=False)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(class_names))
model.load_state_dict(torch.load(model_path, map_location=device))
model = model.to(device)
model.eval()

def evaluate_model(model, dataloader, class_names):
    all_preds = []
    all_labels = []
    all_paths = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

        all_paths = [path for path, _ in test_dataset.imgs]

    df = pd.DataFrame({
        'image_path': all_paths,
        'true_label': [class_names[i] for i in all_labels],
        'predicted_label': [class_names[i] for i in all_preds]
    })
    df.to_csv('/remote-home/cs_acmis_hby/Galaxy-Zoo-Classification/Contrast_experiment/baseline_result/resnet50_galaxy_predictions.csv', index=False)
    print("test_predictions.csv")

    report = classification_report(all_labels, all_preds, target_names=class_names)
    print("Classification Report:\n", report)

    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d',
                xticklabels=class_names, yticklabels=class_names, cmap='Blues')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.tight_layout()
    plt.savefig('/remote-home/cs_acmis_hby/Galaxy-Zoo-Classification/Contrast_experiment/baseline_result/confusion_matrix.png')
    print("confusion_matrix.png")
    plt.show()

evaluate_model(model, test_loader, class_names)


# densenet121

In [None]:
import os
import torch
import torch.nn as nn
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader
from torch.optim import lr_scheduler
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

data_dir = '/remote-home/cs_acmis_hby/Galaxy-Zoo-Classification/Contrast_experiment/Galaxy-Classification-Using-CNN/output_dataset'

data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
}

image_datasets = {
    x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
    for x in ['train', 'val', 'test']
}
dataloaders = {
    x: DataLoader(image_datasets[x], batch_size=64, shuffle=(x == 'train'), num_workers=4)
    for x in ['train', 'val', 'test']
}
class_names = image_datasets['train'].classes
print(f"Detected classes: {class_names}")

model = models.densenet121(pretrained=True)
num_ftrs = model.classifier.in_features
model.classifier = nn.Linear(num_ftrs, len(class_names))
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

def train_model(model, criterion, optimizer, scheduler, num_epochs=10):
    best_acc = 0.0
    best_model_wts = model.state_dict()

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")
        print('-' * 20)

        for phase in ['train', 'val']:
            model.train() if phase == 'train' else model.eval()

            running_loss, running_corrects = 0.0, 0

            for inputs, labels in dataloaders[phase]:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / len(image_datasets[phase])
            epoch_acc = running_corrects.double() / len(image_datasets[phase])
            print(f"{phase.capitalize()} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

    model.load_state_dict(best_model_wts)
    return model

model = train_model(model, criterion, optimizer, scheduler, num_epochs=15)

save_path = '/remote-home/cs_acmis_hby/Galaxy-Zoo-Classification/Contrast_experiment/densenet121_best.pth'
torch.save(model.state_dict(), save_path)
print(f"\n✅ : {save_path}")

def evaluate_model(model, dataloader, dataset, class_names, output_dir):
    model.eval()
    all_preds = []
    all_labels = []
    all_paths = [path for path, _ in dataset.imgs]

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    df = pd.DataFrame({
        'image_path': all_paths,
        'true_label': [class_names[i] for i in all_labels],
        'predicted_label': [class_names[i] for i in all_preds]
    })
    csv_path = os.path.join(output_dir, 'densenet121_predictions.csv')
    df.to_csv(csv_path, index=False)
    print(f"✅: {csv_path}")

    report = classification_report(all_labels, all_preds, target_names=class_names, digits=4)
    print("Classification Report:\n", report)

    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', xticklabels=class_names, yticklabels=class_names, cmap='Blues')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix - DenseNet121')
    plt.tight_layout()
    cm_path = os.path.join(output_dir, 'densenet121_confusion_matrix.png')
    plt.savefig(cm_path)
    print(f"✅: {cm_path}")
    plt.close()

output_dir = '/remote-home/cs_acmis_hby/Galaxy-Zoo-Classification/Contrast_experiment/baseline_result'
os.makedirs(output_dir, exist_ok=True)
evaluate_model(model, dataloaders['test'], image_datasets['test'], class_names, output_dir)


# EfficientNet-B0

In [None]:
import os
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report
import timm
from tqdm import tqdm


BATCH_SIZE = 64
NUM_CLASSES = 8
EPOCHS = 15
LR = 1e-4
DEVICE = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
DATA_DIR = '/remote-home/cs_acmis_hby/Galaxy-Zoo-Classification/Contrast_experiment/Galaxy-Classification-Using-CNN/output_dataset'
LOCAL_MODEL_PATH = '/remote-home/cs_acmis_hby/Galaxy-Zoo-Classification/Contrast_experiment/efficientnet_b0/pytorch_model.bin'
SAVE_PATH = 'best_efficientnet_b0.pth'


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

train_set = datasets.ImageFolder(os.path.join(DATA_DIR, 'train'), transform=transform)
val_set = datasets.ImageFolder(os.path.join(DATA_DIR, 'val'), transform=transform)
test_set = datasets.ImageFolder(os.path.join(DATA_DIR, 'test'), transform=transform)

train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)


model = timm.create_model('efficientnet_b0', pretrained=False, num_classes=NUM_CLASSES)

state_dict = torch.load(LOCAL_MODEL_PATH, map_location=DEVICE)

for key in list(state_dict.keys()):
    if 'classifier' in key or 'head' in key:
        del state_dict[key]

missing, unexpected = model.load_state_dict(state_dict, strict=False)
print(f"Loaded model with {len(missing)} missing keys and {len(unexpected)} unexpected keys.")

model = model.to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)


def validate():
    model.eval()
    correct = 0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
            outputs = model(inputs)
            correct += (outputs.argmax(1) == targets).sum().item()
    return correct / len(val_set)


def train():
    best_val_acc = 0
    for epoch in range(EPOCHS):
        model.train()
        total_loss = 0
        total_correct = 0

        for inputs, targets in tqdm(train_loader, desc=f"[Epoch {epoch+1}/{EPOCHS}] Training"):
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            total_correct += (outputs.argmax(1) == targets).sum().item()

        train_acc = total_correct / len(train_set)
        val_acc = validate()
        print(f"Epoch {epoch+1}: Loss={total_loss:.4f}, Train Acc={train_acc:.4f}, Val Acc={val_acc:.4f}")

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), SAVE_PATH)
            print(f"✅ Saved best model at epoch {epoch+1} with Val Acc: {val_acc:.4f}")


def test():
    model.load_state_dict(torch.load(SAVE_PATH, map_location=DEVICE))
    model.eval()

    y_true, y_pred = [], []
    with torch.no_grad():
        for inputs, targets in tqdm(test_loader, desc="[Testing]"):
            inputs = inputs.to(DEVICE)
            outputs = model(inputs)
            preds = outputs.argmax(1).cpu()
            y_true.extend(targets.numpy())
            y_pred.extend(preds.numpy())

    target_names = train_set.classes
    report = classification_report(y_true, y_pred, target_names=target_names, digits=4)
    print("\n" + "="*80)
    print("================ Galaxy Classification Performance Report ================")
    print("="*80)
    print(report)


if __name__ == "__main__":
    train()
    test()
