In [None]:
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision.models as models
import os

class Cifar10LT(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        img = self.images[idx]
        label = self.labels[idx]
        img = np.transpose(np.reshape(img, (3, 32, 32)), (1, 2, 0))  # Convert to C,H,W format
        if self.transform:
            img = self.transform(img)
        return img, label

# Set save directory
#save_dir = '/home/u2023170724/jupyterlab/ADAT1/Icifar10/cifar10lt_10'
save_dir = '../../datasets/cifar10lt_10'

# Load training and test sets
train_images = np.load(os.path.join(save_dir, 'train_images.npy'))
train_labels = np.load(os.path.join(save_dir, 'train_labels.npy'))
test_images = np.load(os.path.join(save_dir, 'test_images.npy'))
test_labels = np.load(os.path.join(save_dir, 'test_labels.npy'))

# Data preprocessing
train_transform = transforms.Compose([
    transforms.ToPILImage(),  
    transforms.Resize((224, 224)),  
    transforms.RandomCrop(224, padding=4),  
    transforms.RandomHorizontalFlip(),  
    transforms.ToTensor(),
])

test_transform = transforms.Compose([
    transforms.ToPILImage(),  
    transforms.Resize((224, 224)),  
    transforms.ToTensor(), 
])

# Create datasets
train_dataset = Cifar10LT(images=train_images, labels=train_labels, transform=train_transform)
test_dataset = Cifar10LT(images=test_images, labels=test_labels, transform=test_transform)

# Create DataLoader
#train_loader = DataLoader(train_dataset, batch_size=128, num_workers=3, shuffle=True)
#test_loader = DataLoader(test_dataset, batch_size=128, num_workers=3, shuffle=False)
train_loader = DataLoader(train_dataset, batch_size=128, num_workers=0, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, num_workers=0, shuffle=False)
dataloaders = {'train': train_loader, 'test': test_loader}

# Print dataset information
print(f"Number of training samples: {len(train_dataset)}")
unique_train_labels, train_counts = np.unique(train_dataset.labels, return_counts=True)
print("Number of samples per class in training set:", dict(zip(unique_train_labels, train_counts)))

print(f"Number of test samples: {len(test_dataset)}")
unique_test_labels, test_counts = np.unique(test_dataset.labels, return_counts=True)
print("Number of samples per class in test set:", dict(zip(unique_test_labels, test_counts)))

# Example: Iterate through training data loader
for images, labels in train_loader:
    print("Shape of batch images in training set:", images.shape)
    print("Shape of batch labels in training set:", labels.shape)
    break  # Only show the first batch

# Example: Iterate through test data loader
for images, labels in test_loader:
    print("Shape of batch images in test set:", images.shape)
    print("Shape of batch labels in test set:", labels.shape)
    break  # Only show the first batch


In [None]:
from torchvision.models import resnet50

model = resnet50(weights=None)
#model = resnet50()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
num_features = model.fc.in_features  # Get the number of input features
model.fc = nn.Linear(num_features, 10)  # Change the output features to 10 (for CIFAR-10 classes)
model.to(device)  # Move the model to the specified device (CPU or GPU)

In [None]:
# Initialize SGD optimizer with learning rate 0.1, momentum 0.9 and weight decay 5e-4
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)

# Setup learning rate scheduler that reduces the learning rate at epochs 75 and 90
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[75, 90], gamma=0.1)

In [None]:
import torch
import torch.nn.functional as F
from torch.autograd import Variable

def BSL(labels, logits, sample_per_class):
    spc = torch.tensor(sample_per_class).type_as(logits)  # Convert sample count to same data type as logits
    spc = spc.unsqueeze(0).expand(logits.shape[0], -1)    # Expand to the same dimension as logits
    logits = logits + spc.log()                           # Add the log of sample counts to the logits
    loss = F.cross_entropy(input=logits, target=labels)   # Calculate weighted cross entropy loss
    return loss


def pgd_loss(model,
                x_natural,
                y,
                samples_per_cls,
                optimizer,
                step_size=0.003,
                epsilon=0.031,
                perturb_steps=10,
                distance='l_inf'):
    model.eval()
    # Generate adversarial example
    x_adv = x_natural.detach() + 0.001 * torch.randn(x_natural.shape).cuda().detach()
    if distance == 'l_inf':
        for _ in range(perturb_steps):
            x_adv.requires_grad_()
            with torch.enable_grad():
                loss_ce = F.cross_entropy(model(x_adv), y)
            grad = torch.autograd.grad(loss_ce, [x_adv])[0]
            x_adv = x_adv.detach() + step_size * torch.sign(grad.detach())
            x_adv = torch.min(torch.max(x_adv, x_natural - epsilon), x_natural + epsilon)
            x_adv = torch.clamp(x_adv, 0.0, 1.0)
    
    model.train()

    x_adv = Variable(torch.clamp(x_adv, 0.0, 1.0), requires_grad=False)
    # Zero gradient
    optimizer.zero_grad()

    loss = BSL(y, model(x_adv), samples_per_cls)
    return loss

In [None]:
step_size = 0.003
epsilon = 0.031
perturb_steps = 10
import numpy as np

def calculate_class_stats(dataset: Dataset):
    """
    Calculate the number of samples per class and the total number of classes for the given dataset.

    Parameters:
    - dataset: PyTorch Dataset instance (must have a 'targets' attribute or similar)

    Returns:
    - samples_per_cls: Number of samples per class
    - no_of_classes: Total number of classes
    """
    # Check if dataset has a 'targets' attribute
    if hasattr(dataset, 'targets'):
        targets = dataset.targets.cpu().numpy() if isinstance(dataset.targets, torch.Tensor) else np.array(dataset.targets)
    else:
        # Manually extract targets if the dataset does not have a 'targets' attribute
        targets = np.array([target for _, target in dataset])

    no_of_classes = len(np.unique(targets))
    samples_per_cls = np.array([np.sum(targets == i) for i in range(no_of_classes)])
    
    return samples_per_cls, no_of_classes

# Calculate samples_per_cls and no_of_classes for train_dataset
samples_per_cls, no_of_classes = calculate_class_stats(train_dataset)
print(f'Samples per class: {samples_per_cls}')
print(f'Number of classes: {no_of_classes}')

In [None]:
import torch
import numpy as np
import csv
from tqdm import tqdm  # Import tqdm for progress bar
from sklearn.metrics import roc_auc_score
from torch.utils.data import DataLoader
from torchattacks import FGSM, PGD, CW, DeepFool, AutoAttack
from collections import defaultdict

# Assume test_dataset and model are already defined
BATCH_SIZE = 32
NUM_CLASSES = 10  # Assume there are 10 classes
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

def evaluate_model_with_attack_by_class(model, dataloader, attack, num_classes):
    """
    Evaluate model performance on each class using adversarial attacks, calculating accuracy only.

    Parameters:
    - model: The model to be evaluated
    - dataloader: Data loader providing test data
    - attack: Adversarial attack object
    - num_classes: Number of classes in the dataset

    Returns:
    - class_accuracies: Accuracy for each class
    - avg_accuracy: Overall average accuracy
    - balanced_accuracy: Class-balanced accuracy
    """
    model.eval()

    class_correct = defaultdict(int)
    class_total = defaultdict(int)

    total_correct = 0
    total_samples = 0

    # Add a progress bar around the dataloader
    for images, labels in tqdm(dataloader, desc=f'Evaluating {attack}'):
        labels = labels.cuda()
        images = attack(images, labels).cpu()
        outputs = model(images.cuda())

        _, predicted = torch.max(outputs.data, 1)
        total_samples += labels.size(0)
        total_correct += (predicted == labels).sum().item()

        # Record results by class
        for i in range(labels.size(0)):
            label = labels[i].item()
            class_total[label] += 1
            class_correct[label] += (predicted[i] == label).item()

    # Calculate accuracy for each class
    class_accuracies = {}
    for class_idx in range(num_classes):
        if class_total[class_idx] > 0:
            class_accuracies[class_idx] = class_correct[class_idx] / class_total[class_idx]
        else:
            class_accuracies[class_idx] = None

    # Calculate overall average accuracy
    avg_accuracy = total_correct / total_samples

    # Calculate class-balanced accuracy
    balanced_accuracy = np.mean([acc for acc in class_accuracies.values() if acc is not None])

    return class_accuracies, avg_accuracy, balanced_accuracy


def save_result_to_csv(attack_name, class_accuracies, avg_accuracy, balanced_accuracy, filename='attack_results.csv'):
    """
    Save attack results by class and overall average results to a CSV file.

    Parameters:
    - attack_name: Name of the attack
    - class_accuracies: Accuracy for each class
    - avg_accuracy: Overall average accuracy
    - balanced_accuracy: Class-balanced accuracy
    - filename: Name of the output file
    """
    with open(filename, mode='a', newline='') as file:
        writer = csv.writer(file)
        
        # Write accuracy for each class
        for class_idx, accuracy in class_accuracies.items():
            writer.writerow([f'{attack_name} - Class {class_idx}', accuracy])
        
        # Write overall average accuracy and class-balanced accuracy
        writer.writerow([f'{attack_name} - Avg Accuracy', avg_accuracy, ''])
        writer.writerow([f'{attack_name} - Balanced Accuracy', balanced_accuracy, ''])
        print(f'{attack_name} - Avg Accuracy: {avg_accuracy:.4f}, Balanced Accuracy: {balanced_accuracy:.4f} saved to {filename}')


In [None]:
import torch
import os
import time
import csv
import matplotlib.pyplot as plt
import pytz
import datetime
from tqdm import tqdm
from collections import defaultdict

def train_model(model, dataloaders, optimizer, scheduler, num_epochs=100):
    train_loss_history = []

    save_dir = os.getcwd()
    param_dir = os.path.join(save_dir, "parm")
    os.makedirs(param_dir, exist_ok=True)  # Create directory for saving parameters

    # Initialize training loss CSV file
    csv_path = os.path.join(save_dir, "train_loss.csv")
    with open(csv_path, "w", newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Epoch', 'Train Loss'])

    # Initialize attack results CSV file
    attack_results_path = os.path.join(save_dir, "attack_results.csv")
    with open(attack_results_path, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Epoch', 'Attack', 'Class Index', 'Accuracy'])  # Header row with Epoch column

    tz = pytz.timezone('Asia/Shanghai')

    for epoch in range(num_epochs):
        start_time = time.time()

        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        model.train()
        running_loss = 0.0

        for inputs, targets in tqdm(dataloaders['train'], desc=f'Epoch {epoch + 1}/{num_epochs}', leave=False):
            inputs = inputs.to(device)
            targets = targets.to(device)
            targets = targets.squeeze()
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = pgd_loss(model, inputs, targets, samples_per_cls, optimizer, step_size, epsilon, perturb_steps)

            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)

        scheduler.step()

        epoch_loss = running_loss / len(dataloaders['train'].dataset)
        print('Train Loss: {:.4f}'.format(epoch_loss))

        end_time = time.time()
        epoch_time = end_time - start_time
        print('Time taken: {:.0f}m {:.0f}s'.format(epoch_time // 60, epoch_time % 60))

        current_time_beijing = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
        print('Current time (Beijing):', current_time_beijing)

        train_loss_history.append(epoch_loss)

        with open(csv_path, "a", newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow([epoch + 1, epoch_loss])
        
        # Save parameters for current epoch
        param_file_path = os.path.join(param_dir, f"model_epoch_{epoch + 1}.pt")
        torch.save(model.state_dict(), param_file_path)

        # Call evaluation function and save results
        if (epoch + 1) % 10 == 0:  # Check if current epoch is multiple of 10
            evaluate_and_save_results(model, test_loader, attack_results_path, epoch + 1)  # Pass current epoch

        torch.cuda.empty_cache()

    # Save final model parameters
    torch.save(model.state_dict(), os.path.join(save_dir, "model_final.pt"))

    plt.plot(range(1, num_epochs + 1), train_loss_history, label='Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training Loss')
    plt.legend()
    plt.show()


def evaluate_and_save_results(model, test_loader, results_path, epoch):
    # Define attacks to evaluate
    attacks = [
        ('nature', FGSM(model, eps=0)),
        ('PGD-20', PGD(model, eps=8/255, alpha=1/255, steps=20)),
    ]

    for attack_name, attack in attacks:
        print(f'Evaluating with {attack_name}...')
        class_accuracies, avg_accuracy, balanced_accuracy = evaluate_model_with_attack_by_class(model, test_loader, attack, NUM_CLASSES)

        # Save attack results
        with open(results_path, mode='a', newline='') as file:
            writer = csv.writer(file)
            for class_idx, accuracy in class_accuracies.items():
                writer.writerow([epoch, attack_name, class_idx, accuracy])  # Write current epoch, attack name, class index and accuracy
            
            # Write overall average and balanced accuracy
            writer.writerow([epoch, attack_name, 'Avg Accuracy', avg_accuracy])
            writer.writerow([epoch, attack_name, 'Balanced Accuracy', balanced_accuracy])
            print(f'{attack_name} - Avg Accuracy: {avg_accuracy:.4f}, Balanced Accuracy: {balanced_accuracy:.4f} saved to {results_path}')

# Other related functions remain unchanged...


In [None]:
import torch
import torch.nn.functional as F
from torch.autograd import Variable

# Start the model train processing
train_model(model, dataloaders, optimizer, scheduler, num_epochs=100)

In [None]:
import torch
import numpy as np
from sklearn.metrics import roc_auc_score

def evaluate_model(model, dataloader):
    device = next(model.parameters()).device
    
    model.eval()  # Set model to evaluation mode
    
    total_correct = 0
    total_samples = 0
    y_true = []
    y_scores = []
    
    with torch.no_grad():  # No gradients needed for evaluation
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            targets = targets.squeeze()
            outputs = model(inputs)  # Forward pass
            _, predicted = torch.max(outputs, 1)  # Get predicted class
            total_samples += targets.size(0)
            total_correct += (predicted == targets).sum().item()  # Count correct predictions
            # Store true labels and prediction scores for ROC AUC calculation
            y_true.extend(targets.cpu().numpy())
            softmax_probs = torch.softmax(outputs, dim=1)  # Convert logits to probabilities
            y_scores.extend(softmax_probs.cpu().numpy())
        
    # Calculate metrics
    accuracy = total_correct / total_samples
    y_true = np.array(y_true)
    y_scores = np.array(y_scores)
    # Calculate ROC AUC for multi-class classification
    roc_auc = roc_auc_score(y_true, y_scores, multi_class='ovo', average='macro')
    
    return accuracy, roc_auc

# Evaluate on training data
accuracy, roc_auc = evaluate_model(model, train_loader)
print(f"Train Accuracy: {accuracy:.4f}")
print(f"Train ROC AUC: {roc_auc:.4f}")

# Evaluate on test data
accuracy, roc_auc = evaluate_model(model, test_loader)
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test ROC AUC: {roc_auc:.4f}")


In [None]:
import torchattacks
from torchattacks import PGD, FGSM

# Set batch size for testing
BATCH_SIZE = 64

# Create DataLoader for test dataset
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
# Load the trained model from the saved weights file
weights_path = './model_final.pt'
# Load the state dictionary from the file
state_dict = torch.load(weights_path)
# Update the model's parameters with the loaded state dictionary
model.load_state_dict(state_dict)

In [None]:
import torch
import numpy as np
import csv
from tqdm import tqdm  # Import tqdm for progress bar
from sklearn.metrics import roc_auc_score
from torch.utils.data import DataLoader
from torchattacks import FGSM, PGD, CW, DeepFool, AutoAttack
from collections import defaultdict

# Assuming test_dataset and model are already defined
BATCH_SIZE = 32
NUM_CLASSES = 10  # Assuming 10 classes
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

def evaluate_model_with_attack_by_class(model, dataloader, attack, num_classes):
    """
    Evaluate model performance on each class using the given adversarial attack, calculating accuracy only.

    Parameters:
    - model: The model to evaluate
    - dataloader: Data loader providing test data
    - attack: Adversarial attack object
    - num_classes: Number of classes in the dataset

    Returns:
    - class_accuracies: Accuracy for each class
    - avg_accuracy: Overall average accuracy
    - balanced_accuracy: Class-balanced accuracy
    """
    model.eval()

    class_correct = defaultdict(int)
    class_total = defaultdict(int)

    total_correct = 0
    total_samples = 0

    # Add a progress bar around the dataloader
    for images, labels in tqdm(dataloader, desc=f'Evaluating {attack}'):
        labels = labels.cuda()
        images = attack(images, labels).cpu()
        outputs = model(images.cuda())

        _, predicted = torch.max(outputs.data, 1)
        total_samples += labels.size(0)
        total_correct += (predicted == labels).sum().item()

        # Record results by class
        for i in range(labels.size(0)):
            label = labels[i].item()
            class_total[label] += 1
            class_correct[label] += (predicted[i] == label).item()

    # Calculate accuracy for each class
    class_accuracies = {}
    for class_idx in range(num_classes):
        if class_total[class_idx] > 0:
            class_accuracies[class_idx] = class_correct[class_idx] / class_total[class_idx]
        else:
            class_accuracies[class_idx] = None

    # Calculate overall average accuracy
    avg_accuracy = total_correct / total_samples

    # Calculate class-balanced accuracy
    balanced_accuracy = np.mean([acc for acc in class_accuracies.values() if acc is not None])

    return class_accuracies, avg_accuracy, balanced_accuracy


def save_result_to_csv(attack_name, class_accuracies, avg_accuracy, balanced_accuracy, filename='attack_results.csv'):
    """
    Save attack results by class and overall average to CSV file.

    Parameters:
    - attack_name: Name of the attack
    - class_accuracies: Accuracy for each class
    - avg_accuracy: Overall average accuracy
    - balanced_accuracy: Class-balanced accuracy
    - filename: Name of file to save
    """
    with open(filename, mode='a', newline='') as file:
        writer = csv.writer(file)
        
        # Write accuracy for each class
        for class_idx, accuracy in class_accuracies.items():
            writer.writerow([f'{attack_name} - Class {class_idx}', accuracy])
        
        # Write overall average accuracy and class-balanced accuracy
        writer.writerow([f'{attack_name} - Avg Accuracy', avg_accuracy, ''])
        writer.writerow([f'{attack_name} - Balanced Accuracy', balanced_accuracy, ''])
        print(f'{attack_name} - Avg Accuracy: {avg_accuracy:.4f}, Balanced Accuracy: {balanced_accuracy:.4f} saved to {filename}')


# Initialize CSV file with header
with open('attack_results_final.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Attack', 'Accuracy'])

# Define attacks to evaluate
attacks = [
    ('nature', FGSM(model, eps=0)),
    ('FGSM', FGSM(model, eps=8/255)),
    ('PGD-20', PGD(model, eps=8/255, alpha=1/255, steps=20)),
    ('PGD-100', PGD(model, eps=8/255, alpha=1/255, steps=100)),
    ('CW', CW(model, c=10, kappa=0, steps=100, lr=0.01)),
    ('DeepFool', DeepFool(model)),
    ('AutoAttack', AutoAttack(model, norm='Linf', eps=8/255, version='standard', n_classes=NUM_CLASSES, seed=None, verbose=False))
]

# Evaluate each attack and save results in real-time
for attack_name, attack in attacks:
    print(f'Evaluating with {attack_name}...')
    class_accuracies, avg_accuracy, balanced_accuracy = evaluate_model_with_attack_by_class(model, test_loader, attack, NUM_CLASSES)
    save_result_to_csv(attack_name, class_accuracies, avg_accuracy, balanced_accuracy)


In [None]:
import torch
import numpy as np
import csv
from tqdm import tqdm  # Import tqdm for progress bar
from sklearn.metrics import roc_auc_score
from torch.utils.data import DataLoader
from torchattacks import FGSM, PGD, CW, DeepFool, AutoAttack
from collections import defaultdict

# Assume test_dataset and model are already defined
BATCH_SIZE = 32
NUM_CLASSES = 10  # Assume there are 10 classes
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

def evaluate_model_with_attack_by_class(model, dataloader, attack, num_classes):
    """
    Evaluate model performance on each class using adversarial attacks, calculating accuracy only.

    Parameters:
    - model: The model to be evaluated
    - dataloader: Data loader providing test data
    - attack: Adversarial attack object
    - num_classes: Number of classes in the dataset

    Returns:
    - class_accuracies: Accuracy for each class
    - avg_accuracy: Overall average accuracy
    - balanced_accuracy: Class-balanced accuracy
    """
    model.eval()

    class_correct = defaultdict(int)
    class_total = defaultdict(int)

    total_correct = 0
    total_samples = 0

    # Add a progress bar around the dataloader
    for images, labels in tqdm(dataloader, desc=f'Evaluating {attack}'):
        labels = labels.cuda()
        images = attack(images, labels).cpu()
        outputs = model(images.cuda())

        _, predicted = torch.max(outputs.data, 1)
        total_samples += labels.size(0)
        total_correct += (predicted == labels).sum().item()

        # Record results by class
        for i in range(labels.size(0)):
            label = labels[i].item()
            class_total[label] += 1
            class_correct[label] += (predicted[i] == label).item()

    # Calculate accuracy for each class
    class_accuracies = {}
    for class_idx in range(num_classes):
        if class_total[class_idx] > 0:
            class_accuracies[class_idx] = class_correct[class_idx] / class_total[class_idx]
        else:
            class_accuracies[class_idx] = None

    # Calculate overall average accuracy
    avg_accuracy = total_correct / total_samples

    # Calculate class-balanced accuracy
    balanced_accuracy = np.mean([acc for acc in class_accuracies.values() if acc is not None])

    return class_accuracies, avg_accuracy, balanced_accuracy


def save_result_to_csv(attack_name, class_accuracies, avg_accuracy, balanced_accuracy, filename='attack_results.csv'):
    """
    Save attack results by class and overall average results to a CSV file.

    Parameters:
    - attack_name: Name of the attack
    - class_accuracies: Accuracy for each class
    - avg_accuracy: Overall average accuracy
    - balanced_accuracy: Class-balanced accuracy
    - filename: Name of the output file
    """
    with open(filename, mode='a', newline='') as file:
        writer = csv.writer(file)
        
        # Write accuracy for each class
        for class_idx, accuracy in class_accuracies.items():
            writer.writerow([f'{attack_name} - Class {class_idx}', accuracy])
        
        # Write overall average accuracy and class-balanced accuracy
        writer.writerow([f'{attack_name} - Avg Accuracy', avg_accuracy, ''])
        writer.writerow([f'{attack_name} - Balanced Accuracy', balanced_accuracy, ''])
        print(f'{attack_name} - Avg Accuracy: {avg_accuracy:.4f}, Balanced Accuracy: {balanced_accuracy:.4f} saved to {filename}')
