<a href="https://colab.research.google.com/github/TheS1n233/Distributed-Learning-Project5/blob/experiments/hyperparams_finding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
!pip install torch torchvision matplotlib
!pip install --upgrade torch torchvision



In [6]:
import torch
import torchvision
import torchvision.transforms as transforms
import random
import numpy as np
import json
from PIL import Image
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
import matplotlib.pyplot as plt
import time
from torch.amp import GradScaler, autocast
import os
from google.colab import drive
import math

In [7]:
drive.mount('/content/drive')
if not os.path.exists('/content/drive/MyDrive'):
    raise RuntimeError("Google Drive not mounted correctly!")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
BATCH_SIZE = 64

# Preprocessing new

In [9]:
# Define the transform to only convert the images to tensors (without normalization yet)
transform = transforms.Compose([
    transforms.ToTensor(),
])

# Load the CIFAR-100 training dataset
train_dataset = torchvision.datasets.CIFAR100(
    root='./data',
    train=True,
    download=True,
    transform=transform
)


train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=100, shuffle=False, num_workers=2)

# Initialize sums for calculating mean and std
mean = torch.zeros(3)
std = torch.zeros(3)

for images, _ in train_loader:
    # Compute mean and std for each channel
    mean += images.mean(dim=[0, 2, 3])  # Mean per channel (R, G, B)
    std += images.std(dim=[0, 2, 3])  # Std per channel (R, G, B)

mean /= len(train_loader)
std /= len(train_loader)

print("Mean: ", mean)
print("Std: ", std)

transform_train = transforms.Compose([
    transforms.RandomCrop((32,32),padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[mean[0].item(), mean[1].item(), mean[2].item()],
                         std=[std[0].item(), std[1].item(), std[2].item()])
])

transform_test = transforms.Compose([
    transforms.CenterCrop(32),
    transforms.ToTensor(),
    transforms.Normalize(mean=[mean[0].item(), mean[1].item(), mean[2].item()],
                         std=[std[0].item(), std[1].item(), std[2].item()])
])

# Load CIFAR-100 dataset
start_time = time.time()
train_dataset = torchvision.datasets.CIFAR100(
    root='./data',
    train=True,
    download=True,
    transform=transform_train
)
test_dataset = torchvision.datasets.CIFAR100(
    root='./data',
    train=False,
    download=True,
    transform=transform_test
)
print(f"Dataset loading time: {time.time() - start_time:.2f} seconds")

# Split training and validation sets
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

# Data loaders
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2,
    pin_memory=True
)
val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)
test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

# Debugging: Check DataLoader outputs
for i, (inputs, labels) in enumerate(train_loader):
    print(f"Batch {i}: inputs shape: {inputs.shape}, labels shape: {labels.shape}")
    if i == 10:  # Test first 10 batches
        break
print(f"Data loading for 10 batches completed.")


Files already downloaded and verified
Mean:  tensor([0.5071, 0.4865, 0.4409])
Std:  tensor([0.2667, 0.2558, 0.2754])
Files already downloaded and verified
Files already downloaded and verified
Dataset loading time: 1.76 seconds
Batch 0: inputs shape: torch.Size([64, 3, 32, 32]), labels shape: torch.Size([64])
Batch 1: inputs shape: torch.Size([64, 3, 32, 32]), labels shape: torch.Size([64])
Batch 2: inputs shape: torch.Size([64, 3, 32, 32]), labels shape: torch.Size([64])
Batch 3: inputs shape: torch.Size([64, 3, 32, 32]), labels shape: torch.Size([64])
Batch 4: inputs shape: torch.Size([64, 3, 32, 32]), labels shape: torch.Size([64])
Batch 5: inputs shape: torch.Size([64, 3, 32, 32]), labels shape: torch.Size([64])
Batch 6: inputs shape: torch.Size([64, 3, 32, 32]), labels shape: torch.Size([64])
Batch 7: inputs shape: torch.Size([64, 3, 32, 32]), labels shape: torch.Size([64])
Batch 8: inputs shape: torch.Size([64, 3, 32, 32]), labels shape: torch.Size([64])
Batch 9: inputs shape: to

# Define LeNet-5

In [10]:
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()

        # Layer convolutivi
        self.conv1 = nn.Conv2d(3, 64, kernel_size=5)  # 3 input channels, 64 output channels
        self.conv2 = nn.Conv2d(64, 64, kernel_size=5)  # 64 input channels, 64 output channels

        # Layer fully connected
        self.fc1 = nn.Linear(64 * 5 * 5, 384)  # Dimensione calcolata per input 32x32 con due conv e max-pooling
        self.fc2 = nn.Linear(384, 192)
        self.fc3 = nn.Linear(192, 100)  # Classificatore lineare per CIFAR-100

    def forward(self, x):
        # Layer convolutivi con ReLU e max-pooling
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)  # Max pooling 2x2
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)  # Max pooling 2x2

        # Flatten per i layer fully connected
        x = torch.flatten(x, 1)

        # Layer fully connected con ReLU
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))

        # Classificatore lineare
        x = self.fc3(x)

        # Softmax per probabilità
        x = F.log_softmax(x, dim=1)
        return x

# Searching

In [11]:
def train_and_evaluate_random(model, optimizer, scheduler, criterion, train_loader, val_loader, num_epochs, device, type_optimizer):

    model.train()
    accumulation_steps = 4  # Gradient accumulation

    for epoch in range(num_epochs):
        epoch_start = time.time()

        train_loss_total = 0
        train_correct, train_total = 0, 0

        # Training loop
        for batch_idx, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
            # inputs, labels_a, labels_b, lam = mixup_data(inputs, labels, alpha)
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels) / accumulation_steps

            loss.backward()

            if (batch_idx + 1) % accumulation_steps == 0:
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
                optimizer.step()

            train_loss_total += loss.item() * accumulation_steps
            _, predicted = outputs.max(1)
            train_total += labels.size(0)
            train_correct += predicted.eq(labels).sum().item()

        train_loss = train_loss_total / len(train_loader)
        train_acc = 100. * train_correct / train_total

        # Validation loop
        model.eval()
        val_loss, correct, total = 0, 0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

        val_acc = 100. * correct / total
        val_loss /= len(val_loader)

        if epoch == 2 and train_acc < 1.0:
            print(f"Epoch {epoch+1}: Train accuracy {train_acc:.2f}% too low, skipping to next trial...")
            return None, None, None
        if epoch == 3 and train_acc < 5.0:
            print(f"Epoch {epoch+1}: Train accuracy {train_acc:.2f}% too low, skipping to next trial...")
            return None, None, None

        print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")

        # Adjust learning rate
        scheduler.step()

    return train_loss, val_loss, val_acc


# Random search

In [12]:
def random_search(train_loader, val_loader, model_class, device, num_trials, num_epochs, type_optimizer):
    if type_optimizer == 'AdamW':
        param_space = {
            'lr': [8e-4, 1e-3, 2e-3, 3e-3, 5e-3],
            'weight_decay': [1e-2, 5e-2, 1e-1],
            'eps': [1e-8]
        }
    elif type_optimizer == 'SGDM':
        param_space = {
            'lr': [1e-4, 5e-4, 1e-3, 1e-2, 5e-2],
            'momentum': [0.9, 0.95],
            'weight_decay': [1e-6, 1e-5, 1e-4, 1e-3, 1e-2]
        }

    else:
        raise ValueError(f"Invalid optimizer type: {type_optimizer}")

    results = []
    used_combinations = set()
    for trial in range(num_trials):
        model = model_class().to(device)

        if type_optimizer == 'AdamW':
            while True:
                lr = random.choice(param_space['lr'])
                weight_decay = random.choice(param_space['weight_decay'])
                eps = random.choice(param_space['eps'])
                params = (lr, weight_decay, eps)
                if params not in used_combinations:
                    used_combinations.add(params)
                    break

            optimizer = optim.AdamW(
                model.parameters(),
                lr=lr,
                weight_decay=weight_decay,
                eps=eps
            )

            print(f"Trial {trial + 1}/{num_trials} | lr: {lr} | weight_decay: {weight_decay} | eps: {eps}")

        elif type_optimizer == 'SGDM':
            while True:
                lr = random.choice(param_space['lr'])
                weight_decay = random.choice(param_space['weight_decay'])
                momentum = random.choice(param_space['momentum'])
                params = (lr, weight_decay, momentum)
                if params not in used_combinations:
                    used_combinations.add(params)
                    break

            optimizer = optim.SGD(
                model.parameters(),
                lr=lr,
                momentum=momentum,
                weight_decay=weight_decay
            )

            print(f"Trial {trial + 1}/{num_trials} | lr: {lr} | weight_decay: {weight_decay} | momentum: {momentum} ")

        else:
            raise ValueError(f"Invalid optimizer type: {type_optimizer}")

        scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)
        criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

        train_loss, val_loss, val_acc = train_and_evaluate_random(
            model, optimizer, scheduler, criterion, train_loader, val_loader, num_epochs, device=device,type_optimizer=type_optimizer
        )

        if train_loss is None:
            print(f"Skipping trial {trial + 1} due to low accuracy.")
            continue

        if type_optimizer == 'AdamW':
            results.append({
                'lr': lr,
                'weight_decay': weight_decay,
                'eps': eps,
                'val_loss': val_loss,
                'val_acc': val_acc
            })


        elif type_optimizer == 'SGDM':
            results.append({
                'lr': lr,
                'weight_decay': weight_decay,
                'momentum': momentum,
                'val_loss': val_loss,
                'val_acc': val_acc
            })

    results.sort(key=lambda x: x['val_acc'], reverse=True)
    best_hyperparams = results[0]

    save_path = 'best_hyperparams_'+ type_optimizer
    with open('/content/drive/MyDrive/' + save_path+ '.json', 'w') as f:
                json.dump(best_hyperparams, f)
    print('\nBest Hyperparameters saved to /content/drive/MyDrive/' + save_path +'.json')

    return best_hyperparams

# Grid search

In [13]:
def check_initial_loss(model, train_loader, device):
    """Step 1: Check initial loss at initialization"""
    model.eval()
    with torch.no_grad():
        inputs, labels = next(iter(train_loader))
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = F.cross_entropy(outputs, labels)
        expected_loss = math.log(100)
        print(f"Initial loss: {loss.item():.4f}")
        print(f"Expected loss: {expected_loss:.4f}")
        return abs(loss.item() - expected_loss) < 1.0

def overfit_small_sample(model, train_loader, device, type_optimizer, max_iterations=1000):
    """Step 2: Overfit a small sample"""
    # Get small sample (5 minibatches)
    small_dataset = []
    for i, (inputs, labels) in enumerate(train_loader):
        if i >= 5:
            break
        small_dataset.extend(list(zip(inputs, labels)))

    small_loader = torch.utils.data.DataLoader(small_dataset, batch_size=64, shuffle=True)

    if type_optimizer == 'AdamW':
        optimizer = optim.AdamW(model.parameters(), lr=0.001)
    elif type_optimizer == 'SGDM':
        optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)


    model.train()

    for iteration in range(max_iterations):
        total_loss = 0
        correct = 0
        total = 0

        for inputs, labels in small_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = F.cross_entropy(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        accuracy = 100. * correct / total
        avg_loss = total_loss / len(small_loader)

        if iteration % 50 == 0:
            print(f"Iteration {iteration}, Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")

        if accuracy > 99:
            print("Successfully overfit small sample!")
            return True

    return False

def find_initial_lr(model, train_loader, device, type_optimizer, lrs=[8e-4, 1e-3, 2e-3, 3e-3, 5e-3]):
    """Step 3: Find LR that makes loss go down"""
    best_lr = None
    min_loss_decrease = float('inf')

    for lr in lrs:
        model.apply(lambda m: m.reset_parameters() if hasattr(m, 'reset_parameters') else None)

        if type_optimizer == 'AdamW':
            optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-5)
        elif type_optimizer == 'SGDM':
            optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

        initial_loss = None
        final_loss = None

        print(f"\nTrying learning rate: {lr}")

        for iteration in range(100):
            inputs, labels = next(iter(train_loader))
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = F.cross_entropy(outputs, labels)

            if iteration == 0:
                initial_loss = loss.item()
            final_loss = loss.item()

            loss.backward()
            optimizer.step()

            if iteration % 20 == 0:
                print(f"Iteration {iteration}, Loss: {loss.item():.4f}")

        loss_decrease = initial_loss - final_loss
        print(f"Loss decrease: {loss_decrease:.4f}")

        if loss_decrease > 0 and loss_decrease < min_loss_decrease:
            min_loss_decrease = loss_decrease
            best_lr = lr

    return best_lr

def coarse_grid_search(model, train_loader, val_loader, device, best_lr, type_optimizer):
    """Step 4: Coarse grid search"""


    lrs = [best_lr / 2, best_lr, best_lr * 2]
    weight_decays = [5e-3, 7e-3, 1e-2, 5e-2, 1e-1]
    # momentum_arr = [0.9, 0.95]
    eps =  [1e-8]
    results = []




    for lr in lrs:
        for wd in weight_decays:
            model.apply(lambda m: m.reset_parameters() if hasattr(m, 'reset_parameters') else None)

            if type_optimizer == 'AdamW':
                eps = random.choice(eps)
                print(f"\nTrying LR: {lr}, Weight Decay: {wd}, Eps: {eps}")
                optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=wd, eps=eps)
            elif type_optimizer == 'SGDM':
                print(f"\nTrying LR: {lr}, Weight Decay: {wd}, Momentum: {momentum}")
                momentum = random.choice(momentum_arr)
                optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)



            train_losses = []
            val_accuracies = []

            for epoch in range(10):
                # Training
                model.train()
                total_loss = 0
                for inputs, labels in train_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = F.cross_entropy(outputs, labels)
                    loss.backward()
                    optimizer.step()
                    total_loss += loss.item()

                # Validation
                model.eval()
                correct = 0
                total = 0
                with torch.no_grad():
                    for inputs, labels in val_loader:
                        inputs, labels = inputs.to(device), labels.to(device)
                        outputs = model(inputs)
                        _, predicted = outputs.max(1)
                        total += labels.size(0)
                        correct += predicted.eq(labels).sum().item()

                train_losses.append(total_loss / len(train_loader))
                val_accuracies.append(100. * correct / total)

                print(f"Epoch {epoch + 1}, Loss: {train_losses[-1]:.4f}, Val Acc: {val_accuracies[-1]:.2f}%")


            if type_optimizer == 'AdamW':
                results.append({
                    'lr': lr,
                    'weight_decay': wd,
                    'eps': eps,
                    'final_loss': train_losses[-1],
                    'final_val_acc': val_accuracies[-1],
                    'momentum': momentum
                })
            elif type_optimizer == 'SGDM':
                results.append({
                    'lr': lr,
                    'weight_decay': wd,
                    'momentum': momentum,
                    'final_loss': train_losses[-1],
                    'final_val_acc': val_accuracies[-1],
                    'momentum': momentum
                })

    results.sort(key=lambda x: x['final_val_acc'], reverse=True)
    return results[0]

def train_model_with_early_stopping(model, optimizer, criterion, scheduler, train_loader, val_loader, num_epochs, device, patience=5):
    """Train model with early stopping"""
    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    best_val_loss = float('inf')
    epochs_without_improvement = 0

    for epoch in range(num_epochs):
        model.train()
        train_loss, correct, total = 0, 0, 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        train_losses.append(train_loss / len(train_loader))
        train_accuracies.append(100. * correct / total)

        # Validation phase
        model.eval()
        val_loss, correct, total = 0, 0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

        val_losses.append(val_loss / len(val_loader))
        val_accuracies.append(100. * correct / total)

        scheduler.step()

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1

        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_losses[-1]:.4f}, "
              f"Train Acc: {train_accuracies[-1]:.2f}%, "
              f"Val Loss: {val_losses[-1]:.4f}, "
              f"Val Acc: {val_accuracies[-1]:.2f}%")

        if epochs_without_improvement >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs")
            break

    return train_losses, val_losses, train_accuracies, val_accuracies

In [14]:
def use_grid_search(model, device, train_loader, val_loader, test_loader, type_optimizer='AdamW'):

    if type_optimizer != 'AdamW' and type_optimizer != 'SGDM':
        raise ValueError(f"Invalid optimizer type: {type_optimizer}")

    print("Starting grid search...")
    # Step 1: Check initial loss
    if check_initial_loss(model, train_loader, device):
        print("Initial loss check passed.")

    # Step 2: Overfit small sample
    if overfit_small_sample(model, train_loader, device, type_optimizer):
        print("Successfully overfit small sample.")

    # Step 3: Find learning rate
    best_lr = find_initial_lr(model, train_loader, device, type_optimizer)
    print(f"Best learning rate: {best_lr}")

    # Step 4: Coarse grid search for LR and weight decay
    best_model_params = coarse_grid_search(model, train_loader, val_loader, device, best_lr, type_optimizer)
    print(f"Best model parameters: {best_model_params}")

    for improvement in range(1, 3):
        # Step 5: Refine grid search and train for longer
        # Train with the best parameters found
        best_lr = best_model_params['lr']
        best_weight_decay = best_model_params['weight_decay']
        model.apply(lambda m: m.reset_parameters() if hasattr(m, 'reset_parameters') else None)
        if type_optimizer == 'AdamW':
            optimizer = optim.AdamW(model.parameters(), lr=best_lr, weight_decay=best_weight_decay, eps=best_model_params['eps'])
        elif type_optimizer == 'SGDM':
            optimizer = optim.SGD(model.parameters(), lr=best_lr, weight_decay=best_weight_decay, momentum=best_model_params['momentum'])
        scheduler = CosineAnnealingLR(optimizer, T_max=20*improvement)


        # Train the model for 10-20 epochs
        model, train_losses, val_losses, train_accuracies, val_accuracies = train_model_with_early_stopping(
            model, optimizer, F.cross_entropy, scheduler, train_loader, val_loader, num_epochs=20*improvement, device=device, patience=5
        )

    # Step 6: Look at learning curves
    # Plot training and validation losses and accuracies
    epochs = range(1, len(train_losses) + 1)

    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_losses, label="Train Loss")
    plt.plot(epochs, val_losses, label="Validation Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_accuracies, label="Train Accuracy")
    plt.plot(epochs, val_accuracies, label="Validation Accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.legend()

    plt.tight_layout()
    plt.show()

In [15]:
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    find_alg = 'random_search' # grid_search or random_search
    type_optimizer = 'AdamW'  # AdamW or SGDM

    model = LeNet5().to(device)
    if find_alg == 'grid_search':
        best_hyperparams = use_grid_search(model, device, train_loader, val_loader, test_loader, type_optimizer=type_optimizer)
    elif find_alg == 'random_search':
        best_hyperparams = random_search(train_loader, val_loader, model_class=LeNet5, device=device, num_trials=25, num_epochs=5, type_optimizer=type_optimizer)


Using device: cuda
Trial 1/25 | lr: 0.002 | weight_decay: 0.01 | eps: 1e-08
Epoch 1/10 | Train Loss: 4.4101 | Train Acc: 3.46% | Val Loss: 4.3152 | Val Acc: 4.75%
Epoch 2/10 | Train Loss: 4.1956 | Train Acc: 6.77% | Val Loss: 4.1132 | Val Acc: 7.78%


Exception in thread Thread-17 (_pin_memory_loop):
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/pin_memory.py", line 59, in _pin_memory_loop
    do_one_step()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/pin_memory.py", line 35, in do_one_step
    r = in_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 122, in get
    return _ForkingPickler.loads(res)
  File "/usr/local/lib/python3.10/dist-packages/torch/multiprocessing/reductions.py", line 541, in rebuild_storage_fd
    fd = df.detach()
  File "/usr/lib/python3.10/multiprocessing/resource_sharer.py", line 57, in detach
    with _resource_sharer.get_connection(self._id) as conn:
  File "/usr/lib/python3

KeyboardInterrupt: 