In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from tqdm import tqdm

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [17]:
# Define the VGG model
class VGG(nn.Module):
    def __init__(self, num_classes=10):
        super(VGG, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Conv2d(256, 10, kernel_size=3, padding=1), nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d((1, 1))
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x.squeeze(2).squeeze(2)

In [18]:
# x = torch.rand(32, 1, 28, 28).to(device)
# model = VGG().to(device)
# model(x).shape

In [19]:
# Load the MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

In [20]:
# Initialize the model, loss function, and optimizer

model = VGG().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=3e-4)

In [21]:
# Train the model
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in tqdm(train_loader):
        optimizer.zero_grad()
        outputs = model(images.to(device))
        loss = criterion(outputs, labels.to(device))
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_dataset)}")

# Save the trained model weights
torch.save(model.state_dict(), '/kaggle/working/vgg_mnist.pth')
print("Model weights saved successfully.")

100%|██████████| 938/938 [00:22<00:00, 41.87it/s]


Epoch 1/20, Loss: 1.5314216805140177


100%|██████████| 938/938 [00:22<00:00, 41.68it/s]


Epoch 2/20, Loss: 1.4335334891637166


100%|██████████| 938/938 [00:22<00:00, 41.42it/s]


Epoch 3/20, Loss: 1.424823322614034


100%|██████████| 938/938 [00:22<00:00, 41.38it/s]


Epoch 4/20, Loss: 1.4214899389266968


100%|██████████| 938/938 [00:22<00:00, 41.96it/s]


Epoch 5/20, Loss: 1.4183476135253905


100%|██████████| 938/938 [00:22<00:00, 41.68it/s]


Epoch 6/20, Loss: 1.4176272205352782


100%|██████████| 938/938 [00:22<00:00, 41.82it/s]


Epoch 7/20, Loss: 1.4161494490941366


100%|██████████| 938/938 [00:22<00:00, 41.59it/s]


Epoch 8/20, Loss: 1.4143956875483195


100%|██████████| 938/938 [00:22<00:00, 41.15it/s]


Epoch 9/20, Loss: 1.4143116863250733


100%|██████████| 938/938 [00:22<00:00, 41.54it/s]


Epoch 10/20, Loss: 1.41233095925649


100%|██████████| 938/938 [00:22<00:00, 41.69it/s]


Epoch 11/20, Loss: 1.4129822092692057


100%|██████████| 938/938 [00:22<00:00, 41.73it/s]


Epoch 12/20, Loss: 1.4119608183542887


100%|██████████| 938/938 [00:22<00:00, 41.58it/s]


Epoch 13/20, Loss: 1.4125634253819783


100%|██████████| 938/938 [00:22<00:00, 41.77it/s]


Epoch 14/20, Loss: 1.4123299784978232


100%|██████████| 938/938 [00:22<00:00, 41.55it/s]


Epoch 15/20, Loss: 1.4116923143386841


100%|██████████| 938/938 [00:22<00:00, 41.94it/s]


Epoch 16/20, Loss: 1.410939383506775


100%|██████████| 938/938 [00:22<00:00, 41.70it/s]


Epoch 17/20, Loss: 1.2974825867335003


100%|██████████| 938/938 [00:22<00:00, 41.16it/s]


Epoch 18/20, Loss: 1.1853390628178915


100%|██████████| 938/938 [00:22<00:00, 42.06it/s]


Epoch 19/20, Loss: 1.1831222778320312


100%|██████████| 938/938 [00:22<00:00, 41.91it/s]

Epoch 20/20, Loss: 1.1843793947855632
Model weights saved successfully.





In [22]:
model_parameters = {}
for name, param in model.named_parameters():
    name = name.replace('.', '_')
    model_parameters[name] = param.cpu().detach().numpy()
    print(f'name is : {name} || shape is {param.shape}')

name is : features_0_weight || shape is torch.Size([32, 1, 3, 3])
name is : features_0_bias || shape is torch.Size([32])
name is : features_2_weight || shape is torch.Size([32, 32, 3, 3])
name is : features_2_bias || shape is torch.Size([32])
name is : features_5_weight || shape is torch.Size([64, 32, 3, 3])
name is : features_5_bias || shape is torch.Size([64])
name is : features_7_weight || shape is torch.Size([64, 64, 3, 3])
name is : features_7_bias || shape is torch.Size([64])
name is : features_10_weight || shape is torch.Size([128, 64, 3, 3])
name is : features_10_bias || shape is torch.Size([128])
name is : features_12_weight || shape is torch.Size([128, 128, 3, 3])
name is : features_12_bias || shape is torch.Size([128])
name is : features_15_weight || shape is torch.Size([256, 128, 3, 3])
name is : features_15_bias || shape is torch.Size([256])
name is : features_17_weight || shape is torch.Size([256, 256, 3, 3])
name is : features_17_bias || shape is torch.Size([256])
name i

In [24]:
with open('/kaggle/working/model_params.h', 'w') as f:
    f.write('#ifndef MODEL_PARAMS_H\n')
    f.write('#define MODEL_PARAMS_H\n\n')
    f.write('#include <vector>\n\n')
    f.write('using namespace std;\n\n')
    for name, param in model_parameters.items():
        if "weight" in name and "classifier" not in name:
            shape = param.shape
            f.write(f'const vector<vector<vector<vector<float>>>> {name} = ')
            f.write('{\n')
            for item in param:
                f.write('{')
                for inner_item in item:
                    f.write('{')
                    for sub_item in inner_item:
                        f.write('{')
                        for sub_sub_item in sub_item:
                            f.write(f'{sub_sub_item}, ')
                        f.write('}, ')
                    f.write('}, ')
                f.write('}, ')
            f.write('};\n')
        
        elif "classifier" in name and "bias" not in name:
            f.write(f'const vector<vector<vector<vector<float>>>> {name} = ')
            f.write('{\n')
            for item in param:
                f.write('{')
                for inner_item in item:
                    f.write('{')
                    for sub_item in inner_item:
                        f.write('{')
                        for sub_sub_item in sub_item:
                            f.write(f'{sub_sub_item}, ')
                        f.write('}, ')
                    f.write('}, ')
                f.write('}, ')
            f.write('};\n')
            
        elif "bias" in name:
            shape = param.shape
            f.write(f'const vector<float> {name} = ')
            f.write('{\n')
            for item in param:
                f.write(f'{item}, ')
            f.write('};\n')
    f.write('#endif // MODEL_PARAMS_H\n')