In [97]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [98]:
batch_size = 64
learning_rate = 3e-4
epochs = 20

In [99]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [100]:
class MiniResNet(nn.Module):
    def __init__(self, in_channels=3, num_classes=10):
        super(MiniResNet, self).__init__()
        
        # Initial convolutional layer
        self.conv1 = nn.Conv2d(in_channels, 16, kernel_size=3, stride=1, padding=1) #64*16*32*32
        self.bn1 = nn.BatchNorm2d(16) #64*16*32*32
        
        # Residual block
        self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1) #64*16*32*32
        self.bn2 = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1) #64*16*32*32
        self.bn3 = nn.BatchNorm2d(16)
        self.conv4 = nn.Conv2d(16, 8, kernel_size=3, stride=1, padding=1)
        # Output layer
        self.fc = nn.Linear(8 * 16 * 16, num_classes)
        
    def forward(self, x):
        # Initial convolutional layer
        out = F.relu(self.bn1(self.conv1(x)))
        
        # Residual block
        residual = out
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.conv3(out)
        out = self.bn3(out)
        out += residual
        out = F.relu(out)
        # Max pooling
        out = F.max_pool2d(out, kernel_size=3, stride=2, padding = 1)
        out = self.conv4(out) #64, 8, 16, 16
        # Flatten and output
        out = out.view(-1, 8 * 16 * 16)
        out = self.fc(out)
        
        return out

In [101]:
transform = transforms.Compose([
    transforms.ToTensor(),
])

In [102]:
cifar10_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_set_size = int(0.8 * len(cifar10_dataset))
val_set_size = int(0.1 * len(cifar10_dataset))
test_set_size = len(cifar10_dataset) - train_set_size - val_set_size
train_set, val_set, test_set = torch.utils.data.random_split(cifar10_dataset, [train_set_size, val_set_size, test_set_size])

Files already downloaded and verified


In [103]:
trainloader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
valloader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False)
testloader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False)

In [104]:
model = MiniResNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

In [108]:
best_val_accuracy = 0
train_losses = []
val_losses = []
for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        
        # Forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        running_loss+=loss
        optimizer.step()
    
    # Validation loop
    model.eval()
    val_loss = 0.0
    correct = 0
    with torch.no_grad():
        for data in valloader:
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss
            
    train_losses.append(running_loss/len(trainloader))
    val_losses.append(val_loss/len(valloader))
    print(f"Epoch: {epoch + 1}, Training Loss: {train_losses[-1]}, Validation Loss: {val_losses[-1]}")
    
    # Save the best model
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        torch.save(model.state_dict(), '/kaggle/working/best_model.pth')
    
    model.train()

Epoch: 1, Training Loss: 1.1575847864151, Validation Loss: 1.1975332498550415
Epoch: 2, Training Loss: 1.1361098289489746, Validation Loss: 1.1464864015579224
Epoch: 3, Training Loss: 1.059550404548645, Validation Loss: 1.0932111740112305
Epoch: 4, Training Loss: 1.0196338891983032, Validation Loss: 1.0534148216247559
Epoch: 5, Training Loss: 0.9831543564796448, Validation Loss: 1.0475348234176636
Epoch: 6, Training Loss: 0.9538805484771729, Validation Loss: 1.0449857711791992
Epoch: 7, Training Loss: 0.9334816336631775, Validation Loss: 1.048587441444397
Epoch: 8, Training Loss: 0.9149734973907471, Validation Loss: 1.001442313194275
Epoch: 9, Training Loss: 0.8976526260375977, Validation Loss: 1.0061023235321045
Epoch: 10, Training Loss: 0.8810766339302063, Validation Loss: 0.9920404553413391
Epoch: 11, Training Loss: 0.8716145157814026, Validation Loss: 0.9926345944404602
Epoch: 12, Training Loss: 0.8582288026809692, Validation Loss: 0.9888817071914673
Epoch: 13, Training Loss: 0.845

In [109]:
model_parameters = {}
for name, param in model.named_parameters():
    name = name.replace('.', '_')
    model_parameters[name] = param.cpu().detach().numpy()
    print(f'name is : {name} || shape is {param.shape}')

name is : conv1_weight || shape is torch.Size([16, 3, 3, 3])
name is : conv1_bias || shape is torch.Size([16])
name is : bn1_weight || shape is torch.Size([16])
name is : bn1_bias || shape is torch.Size([16])
name is : conv2_weight || shape is torch.Size([16, 16, 3, 3])
name is : conv2_bias || shape is torch.Size([16])
name is : bn2_weight || shape is torch.Size([16])
name is : bn2_bias || shape is torch.Size([16])
name is : conv3_weight || shape is torch.Size([16, 16, 3, 3])
name is : conv3_bias || shape is torch.Size([16])
name is : bn3_weight || shape is torch.Size([16])
name is : bn3_bias || shape is torch.Size([16])
name is : conv4_weight || shape is torch.Size([8, 16, 3, 3])
name is : conv4_bias || shape is torch.Size([8])
name is : fc_weight || shape is torch.Size([10, 2048])
name is : fc_bias || shape is torch.Size([10])


In [110]:
with open('/kaggle/working/model_params_resnet.h', 'w') as f:
    f.write('#ifndef MODEL_PARAMS_H\n')
    f.write('#define MODEL_PARAMS_H\n\n')
    f.write('#include <vector>\n\n')
    f.write('using namespace std;\n\n')
    for name, param in model_parameters.items():
        if "weight" in name and "conv" in name:
            shape = param.shape
            f.write(f'const vector<vector<vector<vector<float>>>> {name} = ')
            f.write('{\n')
            for item in param:
                f.write('{')
                for inner_item in item:
                    f.write('{')
                    for sub_item in inner_item:
                        f.write('{')
                        for sub_sub_item in sub_item:
                            f.write(f'{sub_sub_item}, ')
                        f.write('}, ')
                    f.write('}, ')
                f.write('}, ')
            f.write('};\n')
            
        elif "fc" in name and "weight" in name:
            shape = param.shape
            f.write(f'const vector<vector<float>> {name} = ')
            f.write('{\n')
            for item in param:
                f.write('{')
                for inner_item in item:
                    f.write(f'{inner_item}, ')
                f.write('}, ')
            f.write('};\n')
            
        else:
            shape = param.shape
            f.write(f'const vector<float> {name} = ')
            f.write('{\n')
            for item in param:
                f.write(f'{item}, ')
            f.write('};\n')
            
    f.write('#endif // MODEL_PARAMS_H\n')