#EXAMPLE OLD RESNET20

In [2]:
import torch
import torch.nn as nn

class ResNet20(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet20, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU(inplace=True)

        # First residual block
        self.layer1 = self._make_layer(in_channels=32, out_channels=32, num_blocks=3, stride=1)
        
        # Second residual block
        self.layer2 = self._make_layer(in_channels=32, out_channels=64, num_blocks=3, stride=2)
        
        # Third residual block
        self.layer3 = self._make_layer(in_channels=64, out_channels=128, num_blocks=3, stride=2)

        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc_out = nn.Linear(128, num_classes)

        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, in_channels, out_channels, num_blocks, stride):
        layers = []
        layers.append(ResidualBlock(in_channels, out_channels, stride))
        for _ in range(1, num_blocks):
            layers.append(ResidualBlock(out_channels, out_channels, stride=1))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc_out(x)
        return x

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        shortcut = self.shortcut(x)
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.bn2(self.conv2(x))
        x += shortcut
        x = self.relu(x)
        return x


#EXAMPLE OLD TRAIN AND TEST WITH DATA CIFAR 10 

In [3]:
def ResNet20():
    block = ResidualBlock
    model = ResNet20
    (5, block)
    return model

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from ResNet20 import ResNet20  # assuming ResNet20 is defined in ResNet20.py

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
num_epochs = 10
batch_size = 128
learning_rate = 0.1

# CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Model
model = ResNet20(num_classes=10).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# Training loop
def train(epoch):
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        # Forward pass
        outputs = model(inputs.clone())  # Clone inputs to avoid inplace operation
        
        # Compute loss
        loss = criterion(outputs, targets)
        
        # Backward and optimize
        optimizer.zero_grad()
        #loss.backward()
        optimizer.step()
        
        # Statistics
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss/len(train_loader):.4f}, Accuracy: {100.*correct/total:.2f}%')

# Testing loop
def test(epoch):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(test_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            
            # Forward pass
            outputs = model(inputs)
            
            # Compute loss
            loss = criterion(outputs, targets)
            
            # Statistics
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    
    print(f'Test Loss: {test_loss/len(test_loader):.4f}, Accuracy: {100.*correct/total:.2f}%')

# Main training and testing loop
for epoch in range(num_epochs):
    train(epoch)
    test(epoch)


Files already downloaded and verified
Epoch [1/10], Loss: 2.6783, Accuracy: 8.73%
Test Loss: 2.6695, Accuracy: 8.80%
Epoch [2/10], Loss: 2.6797, Accuracy: 8.75%
Test Loss: 2.6706, Accuracy: 8.74%
Epoch [3/10], Loss: 2.6825, Accuracy: 8.58%
Test Loss: 2.6639, Accuracy: 8.75%
Epoch [4/10], Loss: 2.6802, Accuracy: 8.64%
Test Loss: 2.6692, Accuracy: 8.80%
Epoch [5/10], Loss: 2.6803, Accuracy: 8.62%
Test Loss: 2.6691, Accuracy: 8.79%
Epoch [6/10], Loss: 2.6804, Accuracy: 8.62%
Test Loss: 2.6685, Accuracy: 8.85%
Epoch [7/10], Loss: 2.6797, Accuracy: 8.56%
Test Loss: 2.6760, Accuracy: 8.74%
Epoch [8/10], Loss: 2.6798, Accuracy: 8.82%
Test Loss: 2.6690, Accuracy: 8.72%
Epoch [9/10], Loss: 2.6806, Accuracy: 8.63%
Test Loss: 2.6618, Accuracy: 8.85%
Epoch [10/10], Loss: 2.6795, Accuracy: 8.64%
Test Loss: 2.6688, Accuracy: 8.72%


In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from ResNet20 import ResNet20  # assuming ResNet20 is defined in ResNet20.py
from torchvision import transforms

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
num_epochs = 100
batch_size = 128
learning_rate = 0.001

# CIFAR-10 dataset
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform_train, download=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform_test)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Model
model = ResNet20(num_classes=10).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)


# Training loop
def train(epoch):
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        # Forward pass
        outputs = model(inputs.clone())  # Clone inputs to avoid inplace operation
        
        # Compute loss
        loss = criterion(outputs, targets)
        
        # Backward and optimize
        optimizer.zero_grad()
        #loss.backward()
        optimizer.step()
        
        # Statistics
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss/len(train_loader):.4f}, Accuracy: {100.*correct/total:.2f}%')

# Testing loop
def test(epoch):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(test_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            
            # Forward pass
            outputs = model(inputs)
            
            # Compute loss
            loss = criterion(outputs, targets)
            
            # Statistics
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    
    print(f'Test Loss: {test_loss/len(test_loader):.4f}, Accuracy: {100.*correct/total:.2f}%')

# Main training and testing loop
for epoch in range(num_epochs):
    train(epoch)
    test(epoch)

Files already downloaded and verified
Epoch [1/100], Loss: 2.6143, Accuracy: 9.42%
Test Loss: 2.5721, Accuracy: 9.17%
Epoch [2/100], Loss: 2.6132, Accuracy: 9.40%
Test Loss: 2.5646, Accuracy: 9.28%
Epoch [3/100], Loss: 2.6128, Accuracy: 9.37%
Test Loss: 2.5678, Accuracy: 9.19%
Epoch [4/100], Loss: 2.6121, Accuracy: 9.46%
Test Loss: 2.5623, Accuracy: 9.13%
Epoch [5/100], Loss: 2.6128, Accuracy: 9.33%
Test Loss: 2.5708, Accuracy: 9.20%
Epoch [6/100], Loss: 2.6125, Accuracy: 9.57%
Test Loss: 2.5669, Accuracy: 9.13%
Epoch [7/100], Loss: 2.6126, Accuracy: 9.53%
Test Loss: 2.5639, Accuracy: 9.23%
Epoch [8/100], Loss: 2.6179, Accuracy: 9.19%
Test Loss: 2.5678, Accuracy: 9.22%
Epoch [9/100], Loss: 2.6121, Accuracy: 9.65%
Test Loss: 2.5653, Accuracy: 9.25%
Epoch [10/100], Loss: 2.6090, Accuracy: 9.50%
Test Loss: 2.5666, Accuracy: 9.16%
Epoch [11/100], Loss: 2.6123, Accuracy: 9.48%
Test Loss: 2.5745, Accuracy: 9.24%
Epoch [12/100], Loss: 2.6155, Accuracy: 9.24%
Test Loss: 2.5720, Accuracy: 8.97%

OLD MODEL RESNET 20 #EXAMPLE

In [None]:
#reference from kuangliu resnet.py
import torch
import torch.nn as nn
import torch.nn.functional as F
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
        super().__init__()
        #self.in_channels = in_channels
        #self.out_channels = out_channels
        #self.kernel_size = kernel_size
        #self.stride = stride
        #self.padding = padding
        #self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        #self.bn = nn.BatchNorm2d(out_channels) #for stabilize the training process by reducing internal covariate shift
        #self.relu = nn.ReLU()

#    def forward(self, x):
#        x = self.conv(x)
#        x = self.bn1(x)
#        x = self.relu(x)
#        x = self.bn2(x)
#        x = self.conv2(x)
#        return x
#class ConvBlock(nn.Module):
#    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
#        super(ConvBlock, self).__init__()
#        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
#        self.bn = nn.BatchNorm2d(out_channels)  # Batch normalization layer
#        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)  # Apply batch normalization
        x = self.relu(x)
        return x

        if self.downsample is not None:
            identity = self.downsample #for dimention matching, ex input 64x64 become 32x32 for clearly image
        x += identity #current set feature
        x = self.relu(x) #final activation function

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride,first=False):
        super(ResidualBlock, self).__init__()
        in_channels//4
        self.conv1 = ConvBlock(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.conv2 = ConvBlock(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.shortcut = nn.Sequential()

        if in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels,
                kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        residual = self.shortcut(x)
        x = self.conv1(x)
        x = self.conv2(x)
        x = x + residual #because imagine if input + residual = output with high level
        x = F.relu(x)
        return x


class ResNet20(nn.Module):
    def __init__(self):
        super(ResNet20, self).__init__()
        self.conv1 = ConvBlock(3, 16, kernel_size=3, stride=1, padding=1)
        self.layer1 = nn.Sequential(
            ResidualBlock(16, 16, stride=1),
            ResidualBlock(16, 16, stride=1),
            ResidualBlock(16, 16, stride=1)
        )
        self.layer2 = nn.Sequential(
            ResidualBlock(16, 32, stride=2),
            ResidualBlock(32, 32, stride=1),
            ResidualBlock(32, 32, stride=1),
            ResidualBlock(32, 32, stride=1),
            ResidualBlock(32, 32, stride=1)
        )
        self.layer3 = nn.Sequential(
            ResidualBlock(32, 64, stride=2),
            ResidualBlock(64, 64, stride=1),
            ResidualBlock(64, 64, stride=1),
            ResidualBlock(64, 64, stride=1)
        )
        self.avg_pool = nn.AvgPool2d(kernel_size=3)
        self.fc = nn.Linear(64, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

model = ResNet20()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

EXAMPLE TEST

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.batch_norm1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.batch_norm2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != self.expansion * out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, self.expansion * out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * out_channels)
            )

    def forward(self, x):
        residual = x
        out = F.relu(self.batch_norm1(self.conv1(x)))
        out = self.batch_norm2(self.conv2(out))
        out += self.shortcut(residual)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 16

        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.batch_norm = nn.BatchNorm2d(16)
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, block, channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, channels, stride))
            self.in_channels = channels * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.batch_norm(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

def ResNet20():
    return ResNet(BasicBlock, [3, 3, 3])

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from ResNet20 import ResNet20  # assuming ResNet20 is defined in ResNet20.py
from torchvision import transforms

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
num_epochs = 100
batch_size = 128
learning_rate = 0.001

# CIFAR-10 dataset
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform_train, download=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform_test)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Model
model = ResNet20(num_classes=10).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)


# Training loop
def train(epoch):
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        # Forward pass
        outputs = model(inputs.clone())  # Clone inputs to avoid inplace operation
        
        # Compute loss
        loss = criterion(outputs, targets)
        
        # Backward and optimize
        optimizer.zero_grad()
        #loss.backward()
        optimizer.step()
        
        # Statistics
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss/len(train_loader):.4f}, Accuracy: {100.*correct/total:.2f}%')

# Testing loop
def test(epoch):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(test_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            
            # Forward pass
            outputs = model(inputs)
            
            # Compute loss
            loss = criterion(outputs, targets)
            
            # Statistics
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    
    print(f'Test Loss: {test_loss/len(test_loader):.4f}, Accuracy: {100.*correct/total:.2f}%')

# Main training and testing loop
for epoch in range(num_epochs):
    train(epoch)
    test(epoch)

Files already downloaded and verified
Epoch [1/100], Loss: 2.4623, Accuracy: 10.89%
Test Loss: 2.4523, Accuracy: 10.31%
Epoch [2/100], Loss: 2.4653, Accuracy: 10.78%
Test Loss: 2.4529, Accuracy: 10.36%
Epoch [3/100], Loss: 2.4613, Accuracy: 10.93%
Test Loss: 2.4515, Accuracy: 10.39%
Epoch [4/100], Loss: 2.4619, Accuracy: 11.00%
Test Loss: 2.4493, Accuracy: 10.33%
Epoch [5/100], Loss: 2.4591, Accuracy: 11.01%
Test Loss: 2.4527, Accuracy: 10.32%
Epoch [6/100], Loss: 2.4639, Accuracy: 10.85%
Test Loss: 2.4527, Accuracy: 10.54%
Epoch [7/100], Loss: 2.4618, Accuracy: 10.72%
Test Loss: 2.4512, Accuracy: 10.21%
Epoch [8/100], Loss: 2.4651, Accuracy: 10.63%
Test Loss: 2.4536, Accuracy: 10.34%
Epoch [9/100], Loss: 2.4622, Accuracy: 10.87%
Test Loss: 2.4522, Accuracy: 10.27%
Epoch [10/100], Loss: 2.4605, Accuracy: 10.97%
Test Loss: 2.4506, Accuracy: 10.39%
Epoch [11/100], Loss: 2.4581, Accuracy: 10.64%
Test Loss: 2.4502, Accuracy: 10.36%
Epoch [12/100], Loss: 2.4611, Accuracy: 10.82%
Test Loss: 

TEST WITH NEW RESNET20 MODEL

In [1]:
import torch
import torch.nn as nn

class ResNet20(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet20, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)

        # Residual blocks
        self.layer1 = self._make_layer(in_channels=16, out_channels=16, num_blocks=3, stride=1)
        self.layer2 = self._make_layer(in_channels=16, out_channels=32, num_blocks=3, stride=2)
        self.layer3 = self._make_layer(in_channels=32, out_channels=64, num_blocks=3, stride=2)

        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc_out = nn.Linear(64, num_classes)

        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, in_channels, out_channels, num_blocks, stride):
        layers = []
        layers.append(ResidualBlock(in_channels, out_channels, stride))
        for _ in range(1, num_blocks):
            layers.append(ResidualBlock(out_channels, out_channels, stride=1))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc_out(x)
        return x

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        shortcut = self.shortcut(x)
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.bn2(self.conv2(x))
        x += shortcut
        x = self.relu(x)
        return x

# Instantiate the model
model = ResNet20()
print(model)


ResNet20(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): ResidualBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): ResidualBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), s

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from ResNet20 import ResNet20

# Define data preprocessing
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

# Define data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=4)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Instantiate the model
ResNet20 = ResNet20()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(ResNet20.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)

# Training the model
num_epochs = 100
for epoch in range(num_epochs):
    # Train the model
    ResNet20.train()
    train_loss = 0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = ResNet20(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()  # Accumulate the loss

    # Evaluate the model
    ResNet20.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = ResNet20(images)
            loss = criterion(outputs, labels)  # Compute the loss
            test_loss += loss.item()  # Accumulate the loss
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    train_loss /= len(train_loader)
    test_loss /= len(test_loader)
    accuracy = 100 * correct / total
    
    # Print the progress
    #if (epoch+1) % 5 == 0:  # Print every 5 epochs
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%')


ResNet20(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): ResidualBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): ResidualBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), s

In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from ResNet20 import ResNet20

# Define data preprocessing
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

# Define data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=4)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Instantiate the model
model = ResNet20()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)

# Training and Evaluation function
def train_and_evaluate(model, criterion, optimizer, train_loader, test_loader, num_epochs=100):
    for epoch in range(num_epochs):
        # Train the model
        model.train()
        train_loss = 0
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()  # Accumulate the loss

        # Evaluate the model
        model.eval()
        test_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in test_loader:
                outputs = model(images)
                loss = criterion(outputs, labels)  # Compute the loss
                test_loss += loss.item()  # Accumulate the loss
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        train_loss /= len(train_loader)
        test_loss /= len(test_loader)
        accuracy = 100 * correct / total

        # Print the progress
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%')

# Train and evaluate the model
train_and_evaluate(model, criterion, optimizer, train_loader, test_loader)


ResNet20(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): ResidualBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): ResidualBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), s

100%|█████████████████████████████████████████████████████████████████| 170498071/170498071 [35:59<00:00, 78934.45it/s]


Extracting ./data\cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Epoch [1/100], Train Loss: 1.5910, Test Loss: 1.4299, Accuracy: 49.94%
Epoch [2/100], Train Loss: 1.0700, Test Loss: 0.9924, Accuracy: 64.68%
Epoch [3/100], Train Loss: 0.8607, Test Loss: 1.2143, Accuracy: 61.98%
Epoch [4/100], Train Loss: 0.7593, Test Loss: 1.0093, Accuracy: 68.14%
Epoch [5/100], Train Loss: 0.7030, Test Loss: 1.0152, Accuracy: 68.58%
Epoch [6/100], Train Loss: 0.6599, Test Loss: 0.7666, Accuracy: 75.05%
Epoch [7/100], Train Loss: 0.6352, Test Loss: 1.0176, Accuracy: 66.69%
Epoch [8/100], Train Loss: 0.6128, Test Loss: 0.8250, Accuracy: 72.29%
Epoch [9/100], Train Loss: 0.5967, Test Loss: 0.9211, Accuracy: 71.29%
Epoch [10/100], Train Loss: 0.5812, Test Loss: 1.0391, Accuracy: 67.49%
Epoch [11/100], Train Loss: 0.5633, Test Loss: 0.7686, Accuracy: 74.77%
Epoch [12/100], Train Loss: 0.5534, Test Loss: 0.6772, Accuracy: 78.12%
Epoch [13/100], Train Loss: 0.5527, Test Loss: 0.6534, A

OSError: [Errno 22] Invalid argument

In [6]:
# Initiate the model
model = ResNet20()

# Count the total number of parameters in the model
total_params = sum(p.numel() for p in model.parameters())
print(f'Total parameters in ResNet20: {total_params}')

Total parameters in ResNet20: 272474


In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from ResNet20 import ResNet20

# Define data preprocessing
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

# Define data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=4)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Instantiate the model
model = ResNet20()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)

# Training and Evaluation function
def train_and_evaluate(model, criterion, optimizer, train_loader, test_loader, num_epochs=50):
    for epoch in range(num_epochs):
        # Train the model
        model.train()
        train_loss = 0
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()  # Accumulate the loss

        # Evaluate the model
        model.eval()
        test_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in test_loader:
                outputs = model(images)
                loss = criterion(outputs, labels)  # Compute the loss
                test_loss += loss.item()  # Accumulate the loss
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        train_loss /= len(train_loader)
        test_loss /= len(test_loader)
        accuracy = 100 * correct / total

        # Print the progress
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%')

# Train and evaluate the model
train_and_evaluate(model, criterion, optimizer, train_loader, test_loader)


Files already downloaded and verified
Files already downloaded and verified
Epoch [1/50], Train Loss: 1.5742, Test Loss: 1.4986, Accuracy: 48.73%
Epoch [2/50], Train Loss: 1.0804, Test Loss: 1.0815, Accuracy: 62.09%
Epoch [3/50], Train Loss: 0.8963, Test Loss: 1.2693, Accuracy: 62.35%
Epoch [4/50], Train Loss: 0.7750, Test Loss: 0.9539, Accuracy: 67.24%
Epoch [5/50], Train Loss: 0.7102, Test Loss: 0.8339, Accuracy: 72.65%
Epoch [6/50], Train Loss: 0.6717, Test Loss: 0.8267, Accuracy: 73.10%
Epoch [7/50], Train Loss: 0.6336, Test Loss: 0.8490, Accuracy: 71.96%
Epoch [8/50], Train Loss: 0.6067, Test Loss: 0.7637, Accuracy: 74.62%
Epoch [9/50], Train Loss: 0.5956, Test Loss: 1.0533, Accuracy: 67.65%
Epoch [10/50], Train Loss: 0.5806, Test Loss: 1.1974, Accuracy: 65.27%
Epoch [11/50], Train Loss: 0.5670, Test Loss: 0.8245, Accuracy: 74.52%
Epoch [12/50], Train Loss: 0.5499, Test Loss: 0.9779, Accuracy: 68.69%
Epoch [13/50], Train Loss: 0.5412, Test Loss: 0.7624, Accuracy: 74.62%
Epoch [14/

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from ResNet20 import ResNet20

# Define data preprocessing
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

# Define data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=4)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Instantiate the model
model = ResNet20()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)

# Training and Evaluation function
def train_and_evaluate(model, criterion, optimizer, train_loader, test_loader, num_epochs=50):
    for epoch in range(num_epochs):
        # Train the model
        model.train()
        train_loss = 0
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()  # Accumulate the loss

        # Evaluate the model
        model.eval()
        test_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in test_loader:
                outputs = model(images)
                loss = criterion(outputs, labels)  # Compute the loss
                test_loss += loss.item()  # Accumulate the loss
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        train_loss /= len(train_loader)
        test_loss /= len(test_loader)
        accuracy = 100 * correct / total

        # Print the progress
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%')

# Train and evaluate the model
train_and_evaluate(model, criterion, optimizer, train_loader, test_loader)

ResNet20(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): ResidualBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): ResidualBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), s