In [177]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F

In [178]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Custom Swish activation function with inplace option
class Swish(nn.Module):
    def __init__(self, inplace=False):
        super(Swish, self).__init__()
        self.inplace = inplace

    def forward(self, x):
        return x.mul_(torch.sigmoid(x)) if self.inplace else x * torch.sigmoid(x)

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.swish = Swish(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample


        if downsample is not None:
            self.down_bn = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = F.silu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)
            identity = self.down_bn(identity)  
        out += identity
        out = F.silu(out)

        return out


class ResNetCIFAR(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(ResNetCIFAR, self).__init__()
        self.in_channels = 32
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        #self.swish = Swish(inplace=True)
        self.layer1 = self._make_layer(block, 32, layers[0])
        self.layer2 = self._make_layer(block, 64, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 128, layers[2], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(128 * block.expansion, num_classes)
        )


        self._initialize_weights()

    def _make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion),
            )

        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='leaky_relu', a=0.1)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.silu(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

# Define ResNet-20 model with optional num_classes
def resnet20(num_classes=10):
    return ResNetCIFAR(BasicBlock, [3, 3, 3], num_classes=num_classes)


In [171]:
# useful libraries
#############################################
# your code here
# specify preprocessing function
import torchvision
import torchvision.transforms as transforms

# Define data augmentation for training

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # 增强颜色
    transforms.RandomRotation(15),  # 旋转 15 度
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010]),
])


# No augmentation for validation
transform_val = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010)),
])

#############################################

In [172]:
# do NOT change these
from tools.dataset import CIFAR10
from torch.utils.data import DataLoader

# a few arguments, do NOT change these
DATA_ROOT = "./data"
TRAIN_BATCH_SIZE = 128
VAL_BATCH_SIZE = 100

#############################################
# your code here
# construct dataset
train_set = CIFAR10(
    root=DATA_ROOT, 
    mode='train', 
    download=True,
    transform=transform_train    # your code
)
val_set = CIFAR10(
    root=DATA_ROOT, 
    mode='val', 
    download=True,
    transform=transform_val    # your code
)

# construct dataloader
train_loader = DataLoader(
    train_set, 
    batch_size=TRAIN_BATCH_SIZE,  # your code
    shuffle=True,     # your code
    num_workers=4
)
val_loader = DataLoader(
    val_set, 
    batch_size=VAL_BATCH_SIZE,  # your code
    shuffle=False,     # your code
    num_workers=4
)
#############################################

Using downloaded and verified file: ./data\cifar10_trainval_F22.zip
Extracting ./data\cifar10_trainval_F22.zip to ./data
Files already downloaded and verified
Using downloaded and verified file: ./data\cifar10_trainval_F22.zip
Extracting ./data\cifar10_trainval_F22.zip to ./data
Files already downloaded and verified


In [173]:
# specify the device for computation
#############################################
# your code here
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device =='cuda':
    print(f"Run on GPU...\n{torch.cuda.get_device_name(0)}")
else:
    print("Run on CPU...")

# Model Definition  
net = resnet20()
net = net.to(device)
    
#############################################

Run on GPU...
NVIDIA GeForce RTX 4080 SUPER


In [174]:
import torch.nn as nn
import torch.optim as optim

# hyperparameters, do NOT change right now
# initial learning rate
INITIAL_LR = 0.01

# momentum for optimizer
MOMENTUM = 0.9

# L2 regularization strength
REG = 1e-4

#############################################
# your code here
# create loss function
criterion = nn.CrossEntropyLoss()

# Add optimizer
optimizer = optim.SGD(
    net.parameters(),
    lr=INITIAL_LR,
    momentum=MOMENTUM,
    nesterov=True
)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=len(train_loader), epochs=200)
#############################################

In [176]:
# some hyperparameters
# total number of training epochs
EPOCHS = 50

# the folder where the trained model is saved
CHECKPOINT_FOLDER = "./saved_model"

# start the training/validation process
# the process should take about 5 minutes on a GTX 1070-Ti
# if the code is written efficiently.
best_val_acc = 0
current_learning_rate = INITIAL_LR
L1_lambda = 1e-4


print("==> Training starts!")
print("="*50)
for i in range(0, EPOCHS):    
    #######################
    # your code here
    # switch to train mode
    net.train()
    
    #######################
    
    print("Epoch %d:" %i)
    # this help you compute the training accuracy
    total_examples = 0
    correct_examples = 0

    train_loss = 0 # track training loss if you want
    
    # Train the model for 1 epoch.
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        ####################################
        # your code here
        # copy inputs to device
        inputs, targets = inputs.to(device), targets.to(device).long()
        
        # compute the output and loss
        outputs = net(inputs)  # Forward pass
        loss = criterion(outputs, targets)  # Compute loss with logits
        
        # Add L1 penalty (sum of absolute values of all model parameters)
        l1_penalty = 0
        for param in net.parameters():
            l1_penalty += torch.sum(torch.abs(param))  # Sum of absolute values
        
        loss = loss + L1_lambda * l1_penalty
        # zero the gradient
        optimizer.zero_grad()
        
        # backpropagation
        loss.backward()
        
        # apply gradient and update the weights
        optimizer.step()
        train_loss += loss.item() # Update the training loss
        
        # count the number of correctly predicted samples in the current batch
        _, predicted = outputs.max(1)  # Get predicted classes
        total_examples += targets.size(0)  # Increment total examples
        correct_examples += predicted.eq(targets).sum().item()  # Increment correct predictions
        ####################################
                
    avg_loss = train_loss / len(train_loader)
    avg_acc = correct_examples / total_examples
    print("Training loss: %.4f, Training accuracy: %.4f" %(avg_loss, avg_acc))

    # Validate on the validation dataset
    #######################
    # your code here
    # switch to eval mode
    net.eval()
    
    #######################

    # this help you compute the validation accuracy
    total_examples = 0
    correct_examples = 0
    
    val_loss = 0 # again, track the validation loss if you want

    # disable gradient during validation, which can save GPU memory
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(val_loader):
            ####################################
            # your code here
            # copy inputs to device
            inputs, targets = inputs.to(device), targets.to(device).long()
            
            # compute the output and loss
            outputs = net(inputs)  # Forward pass
            loss = criterion(outputs, targets)  # Compute loss with logits
            val_loss += loss.item()
            
            # count the number of correctly predicted samples in the current batch
            _, predicted = outputs.max(1)
            total_examples += targets.size(0)
            correct_examples += predicted.eq(targets).sum().item()
            ####################################

    avg_loss = val_loss / len(val_loader)
    avg_acc = correct_examples / total_examples
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))
    
    # save the model checkpoint
    if avg_acc > best_val_acc:
        best_val_acc = avg_acc
        if not os.path.exists(CHECKPOINT_FOLDER):
            os.makedirs(CHECKPOINT_FOLDER)
        print("Saving ...")
        state = {'state_dict': net.state_dict(),
                 'epoch': i,
                 'lr': current_learning_rate}
        torch.save(state, os.path.join(CHECKPOINT_FOLDER, 'resnet.pth'))
        
    print('')

print("="*50)
print(f"==> Optimization finished! Best validation accuracy: {best_val_acc:.4f}")

==> Training starts!
Epoch 0:
Training loss: 0.9104, Training accuracy: 0.8777
Validation loss: 0.3759, Validation accuracy: 0.8806
Saving ...

Epoch 1:
Training loss: 0.9039, Training accuracy: 0.8792
Validation loss: 0.3610, Validation accuracy: 0.8836
Saving ...

Epoch 2:
Training loss: 0.9029, Training accuracy: 0.8806
Validation loss: 0.3540, Validation accuracy: 0.8796

Epoch 3:
Training loss: 0.9007, Training accuracy: 0.8788
Validation loss: 0.3447, Validation accuracy: 0.8820

Epoch 4:
Training loss: 0.8996, Training accuracy: 0.8795
Validation loss: 0.3446, Validation accuracy: 0.8810

Epoch 5:
Training loss: 0.8982, Training accuracy: 0.8795
Validation loss: 0.3339, Validation accuracy: 0.8876
Saving ...

Epoch 6:
Training loss: 0.9077, Training accuracy: 0.8765
Validation loss: 0.3435, Validation accuracy: 0.8852

Epoch 7:
Training loss: 0.9010, Training accuracy: 0.8799
Validation loss: 0.4102, Validation accuracy: 0.8654

Epoch 8:
Training loss: 0.8915, Training accuracy: