In [10]:
import tarfile
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import pandas as pd
import numpy as np
import copy
from PIL import Image
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
from tqdm import tqdm

# Handle the dataset

### extract the compressed dataset

In [None]:
# Extract the .tar.gz file
with tarfile.open('hw1-data.tar.gz', 'r:gz') as archive:
    archive.extractall()  # The folder where the contents will be extracted

### load data

In [None]:
# Define the transformations for the images (resize, normalize, etc.)
train_transform = transforms.Compose([
    transforms.autoaugment.AutoAugment(),  # AutoAugment
    transforms.RandomResizedCrop(224),  # Crop to 224x224
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    transforms.ToTensor(),          # Convert images to tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])  # Normalize images
])

transform = transforms.Compose([
    # transforms.Resize((224, 224)),  # Resize all images to 224x224
    transforms.Resize(256),  # Resize all images to 256x256
    transforms.CenterCrop(224),  # Crop to 224x224
    transforms.ToTensor(),          # Convert images to tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])  # Normalize images
])

# Define the path to the extracted data folder
data_dir = 'data'

# Load datasets from the respective directories (train, valid, test)
train_dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'),
                                     transform=train_transform)
valid_dataset = datasets.ImageFolder(os.path.join(data_dir, 'val'),
                                     transform=transform)

# Create DataLoader objects for batching and shuffling the data
train_loader = DataLoader(train_dataset, batch_size=32,
                          shuffle=True, num_workers=4)
valid_loader = DataLoader(valid_dataset, batch_size=32,
                          shuffle=False, num_workers=4)

for images, labels in train_loader:
    print(labels)
    print(images.shape, labels.shape)
    break

### create Bootstrapped Subsets

In [12]:
def create_bootstrap_loader(dataset, batch_size, n_samples):
    indices = np.random.choice(len(dataset), n_samples, replace=True)
    subset = Subset(dataset, indices)
    loader = DataLoader(subset, batch_size=batch_size,
                        shuffle=True, num_workers=4)
    return loader

# Training

### Load pretrained ResNet

In [None]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


def get_model():
    model = models.resnext101_64x4d(pretrained=True)

    # Freeze the first two layers
    for name, param in model.named_parameters():
        if name.startswith("layer1"):
            param.requires_grad = False

    # Modify the fully connected layer for new number of classes
    num_ftrs = model.fc.in_features
    num_classes = 100
    model.fc = nn.Linear(num_ftrs, num_classes)

    '''
    print(torch.cuda.is_available())
    print(torch.version.cuda)
    print(torch.backends.cudnn.enabled)
    '''

    model = model.to(device)

    # calculate the number of parameters in the model
    total_params = sum(p.numel()
                       for p in model.parameters())
    trainable_params = sum(p.numel()
                           for p in model.parameters() if p.requires_grad)
    print(f"Total parameters: {total_params}")
    print(f"Trainable parameters: {trainable_params}")

    return model

### train process

In [14]:
bagging_models = []


def train(num_models=8, epochs=10):
    for i in range(num_models):
        model = get_model()
        best_model = model
        best_model_loss = 0
        best_acc = 0
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.00008)
        print(f"\nTraining model {i+1}/{num_models}")

        loader = create_bootstrap_loader(train_dataset, batch_size=32,
                                         n_samples=len(train_dataset))
        # loader = train_loader
        for epoch in range(epochs):
            # training phase
            model.train()
            for images, labels in loader:
                images, labels = images.to(device), labels.to(device)
                optimizer.zero_grad()
                output = model(images)
                loss = criterion(output, labels)
                loss.backward()
                optimizer.step()
            # print(f"Epoch {epoch+1}, Loss: {loss.item()}")
            # scheduler.step()

            # validation phase
            model.eval()
            correct = 0
            total = 0
            with torch.no_grad():
                for images, labels in valid_loader:
                    images, labels = images.to(device), labels.to(device)
                    output = model(images)
                    _, predicted = torch.max(output, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

            acc = correct / total
            # print(f"Epoch {epoch + 1}, Accuracy: {acc}")

            if acc > best_acc:
                best_acc = acc
                best_model = copy.deepcopy(model)
                best_model_loss = loss.item()
                print(f"Epoch {epoch + 1}, Accuracy: {acc}")
                print(f"Best model so far with accuracy: {best_acc}")
            elif acc == best_acc and loss.item() < best_model_loss:
                best_model = copy.deepcopy(model)
                best_model_loss = loss.item()
                print(f"Epoch {epoch + 1}, Accuracy: {acc}")
                print(f"Best model so far with accuracy: {best_acc}\n")

            '''
            if (epoch+1) % 5 == 0:
                print(f"Epoch {epoch + 1}, Loss: {loss.item()}")
                print(f"Epoch {epoch + 1}, Accuracy: {acc}")'
            '''

        torch.save(best_model.state_dict(),
                   f'./checkpoint/ResNeXt101_64x4d_Bagging{i+1}.pth')
        bagging_models.append(best_model)


In [None]:
train(num_models=9, epochs=20)

# Testing

### test dataset

In [16]:
class TestDataset(torch.utils.data.Dataset):
    def __init__(self, test_dir, transform=None):
        self.test_dir = test_dir
        self.transform = transform
        self.image_files = sorted([f for f in os.listdir(test_dir)
                                   if f.endswith(('.jpg', '.png', '.jpeg'))])

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.test_dir, img_name)

        file_name = os.path.splitext(img_name)[0]

        # Load image
        image = Image.open(img_path)

        # Apply transforms
        if self.transform:
            image = self.transform(image)

        return image, file_name

In [17]:
test_dataset = TestDataset("./data/test", transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32,
                         shuffle=False, num_workers=4)

### (1) testing after training

In [None]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

all_predictions = []
for model in bagging_models:
    model.to(device)
    class_name = train_dataset.classes
    model.eval()
    predictions = []
    filenames = []
    with torch.no_grad():
        for images, file_names in tqdm(test_loader):
            images = images.to(device)

            # Forward pass
            outputs = model(images)

            # Get predicted class
            _, preds = torch.max(outputs, 1)

            # Map predicted indices to class names
            predicted_labels = [class_name[p]
                                for p in preds.cpu().numpy()]

            # Append to predictions
            predictions.extend(predicted_labels)
            filenames.extend(file_names)
    all_predictions.append(predictions)


# majority voting
predictions = []
for i in range(len(all_predictions[0])):
    pred = []
    for j in range(len(all_predictions)):
        pred.append(all_predictions[j][i])
    predictions.append(max(set(pred), key=pred.count))

# Create DataFrame
results_df = pd.DataFrame({
    'image_name': filenames,
    'pred_label': predictions
})

# Save to CSV
output_csv = 'prediction.csv'
results_df.to_csv(output_csv, index=False)
print(f"Predictions saved to {output_csv}")

### (2) testing directly

In [None]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

all_predictions = []
for i in range(9):
    model = models.resnext101_64x4d(pretrained=True)
    num_ftrs = model.fc.in_features
    num_classes = 100
    model.fc = nn.Linear(num_ftrs, num_classes)

    model = model.to(device)
    model_path = f'./checkpoint/ResNeXt101_64x4d_Bagging{i+1}.pth'
    model.load_state_dict(torch.load(model_path))

    class_name = train_dataset.classes
    model.eval()
    predictions = []
    filenames = []
    with torch.no_grad():
        for images, file_names in tqdm(test_loader):
            images = images.to(device)

            # Forward pass
            outputs = model(images)

            # Get predicted class
            _, preds = torch.max(outputs, 1)

            # Map predicted indices to class names
            predicted_labels = [class_name[p]
                                for p in preds.cpu().numpy()]

            # Append to predictions
            predictions.extend(predicted_labels)
            filenames.extend(file_names)
    all_predictions.append(predictions)


# majority voting
predictions = []
for i in range(len(all_predictions[0])):
    pred = []
    for j in range(len(all_predictions)):
        pred.append(all_predictions[j][i])
    predictions.append(max(set(pred), key=pred.count))

# Create DataFrame
results_df = pd.DataFrame({
    'image_name': filenames,
    'pred_label': predictions
})

# Save to CSV
output_csv = 'prediction.csv'
results_df.to_csv(output_csv, index=False)
print(f"Predictions saved to {output_csv}")