In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

#

In [None]:
VGG_16 = [64,64,'M',128,128,'M',256,256,'M',512,512,512,'M',512,512,512,'M']
#then flatten 4096x4096x1000 Linear layers

class VGG_net(nn.Module):
  def __init__(self, in_channels=3,num_classes=1000):
    super(VGG_net, self).__init__()
    self.in_channels = in_channels
    self.conv_layers = self.create_conv_layers(VGG_16)
    self.fcs = nn.Sequential(
        nn.Linear(512*7*7,4096),
        nn.ReLU(),
        nn.Dropout(p=0.5),
        nn.Linear(4096,4096),
        nn.ReLU(),
        nn.Dropout(p=0.5),
        nn.Linear(4096,num_classes)
    )

  def forward(self,x):
    x = self.conv_layers(x)
    x = x.reshape(x.shape[0],-1)
    x = self.fcs(x)
    return x

  def create_conv_layers(self,architecture):
    layers = []
    in_channels = self.in_channels

    for x in architecture:
      if type(x) == int:
        out_channels = x

        layers += [nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=(3,3),stride=1,padding=1)]
        layers += [nn.BatchNorm2d(x)]
        layers += [nn.ReLU()]
        in_channels = x
      elif x == 'M':
        layers += [nn.MaxPool2d(kernel_size=(2,2),stride=(2,2))]

    return nn.Sequential(*layers)


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = VGG_net(in_channels=3,num_classes=1000).to(device)
print(model)
x = torch.randn(1,3,224,224).to(device)
print(model(x).shape)

VGG_net(
  (conv_layers): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU()
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU()
    (13): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256, kernel_size=(3, 3),

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [None]:
class GoogleNet(nn.Module):
  def __init__(self,in_channels=3,num_classes=1000):
    super(GoogleNet,self).__init__()
    self.conv1 = conv_block(in_channels=in_channels,out_channels=64,kernel_size=(7,7),stride=(2,2),padding=(3,3))
    self.maxpool1 = nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
    self.conv2 = conv_block(64,192,kernel_size=3,stride=1,padding=1)
    self.maxpool2 = nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
    #in_channels,out_1x1,red_3x3,out_3x3,red_5x5,out_5x5,out_1x1pool
    self.inception3a = Inception_block(192,64,96,128,16,32,32)
    self.inception3b = Inception_block(256,128,128,192,32,96,64)
    self.maxpool3 = nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
    self.inception4a = Inception_block(480,192,96,208,16,48,64)
    self.inception4b = Inception_block(512,160,112,224,24,64,64)
    self.inception4c = Inception_block(512,128,128,256,24,64,64)
    self.inception4d = Inception_block(512,112,144,288,32,64,64)
    self.inception4e = Inception_block(528,256,160,320,32,128,128)
    self.maxpool4 = nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
    self.inception5a = Inception_block(832,256,160,320,32,128,128)
    self.inception5b = Inception_block(832,384,192,384,48,128,128)
    self.avgpool = nn.AvgPool2d(kernel_size=7,stride=1)
    self.dropout = nn.Dropout(p=0.4)
    self.fc1 = nn.Linear(1024,1000)

  def forward(self,x):
    x = self.conv1(x)
    x = self.maxpool1(x)
    x = self.conv2(x)
    x = self.maxpool2(x)

    x = self.inception3a(x)
    x = self.inception3b(x)
    x = self.maxpool3(x)

    x = self.inception4a(x)
    x = self.inception4b(x)
    x = self.inception4c(x)
    x = self.inception4d(x)
    x = self.inception4e(x)
    x = self.maxpool4(x)

    x = self.inception5a(x)
    x = self.inception5b(x)

    x = self.avgpool(x)

    x = x.reshape(x.shape[0],-1)
    x = self.dropout(x)
    x = self.fc1(x)
    return x


class Inception_block(nn.Module):
  def __init__(self,in_channels,out_1x1,red_3x3,out_3x3,red_5x5,out_5x5,out_1x1pool):
    super(Inception_block,self).__init__()
    self.branch1 = conv_block(in_channels,out_1x1,kernel_size=1)
    self.branch2 = nn.Sequential(
        conv_block(in_channels,red_3x3,kernel_size=1),
        conv_block(red_3x3,out_3x3,kernel_size=3,padding=1))
    self.branch3 = nn.Sequential(
        conv_block(in_channels,red_5x5,kernel_size=1),
        conv_block(red_5x5,out_5x5,kernel_size=5,padding=2))
    self.branch4 = nn.Sequential(
        nn.MaxPool2d(kernel_size=3,stride=1,padding=1),
        conv_block(in_channels,out_1x1pool,kernel_size=1))

  def forward(self,x):
    return torch.cat([self.branch1(x),self.branch2(x),self.branch3(x),self.branch4(x)],1)

class conv_block(nn.Module):
  def __init__(self,in_channels,out_channels,**kwargs):
    super(conv_block,self).__init__()
    self.relu = nn.ReLU()
    self.conv = nn.Conv2d(in_channels,out_channels,**kwargs) #kernel size = (1,1)(3,3)(5,5)
    self.batchnorm = nn.BatchNorm2d(out_channels)

  def forward(self,x):
    return self.relu(self.batchnorm(self.conv(x)))


In [None]:
x = torch.randn(3,3,224,224)
model = GoogleNet()
print(model(x).shape)

torch.Size([3, 1000])


In [None]:
import torch
import torch.nn as nn



class block(nn.Module):
  def __init__(self, in_channels,out_channels,identity_downsample=None,stride=1):
    super(block,self).__init__()
    self.expansion = 4
    self.conv1 = nn.Conv2d(in_channels,out_channels,kernel_size=1,stride=1,padding=0)
    self.bn1 = nn.BatchNorm2d(out_channels)
    self.conv2 = nn.Conv2d(out_channels,out_channels,kernel_size=3,stride=stride,padding=1)
    self.bn2 = nn.BatchNorm2d(out_channels)
    self.conv3 = nn.Conv2d(out_channels,out_channels*self.expansion,kernel_size=1,stride=1,padding=0)
    self.bn3 = nn.BatchNorm2d(out_channels*self.expansion)
    self.relu = nn.ReLU()
    self.identity_downsample = identity_downsample

  def forward(self,x):
    identity = x

    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.conv2(x)
    x = self.bn2(x)
    x = self.relu(x)
    x = self.conv3(x)
    x = self.bn3(x)

    if self.identity_downsample is not None:
      identity = self.identity_downsample(identity)

    x += identity
    x = self.relu(x)
    return x


class ResNet(nn.Module):
  def __init__(self,block,layers,image_channels,num_classes):
    super(ResNet,self).__init__()
    self.in_channels = 64
    self.conv1 = nn.Conv2d(image_channels,64,kernel_size=7,stride=2,padding=3)
    self.bn1 = nn.BatchNorm2d(64)
    self.relu = nn.ReLU()
    self.maxpool = nn.MaxPool2d(kernel_size=3,stride=2,padding=1)

    #ResNet layers
    self.layer1 = self._make_layer(block,layers[0],out_channels=64,stride=1)
    self.layer2 = self._make_layer(block,layers[1],out_channels=128,stride=2)
    self.layer3 = self._make_layer(block,layers[2],out_channels=256,stride=2)
    self.layer4 = self._make_layer(block,layers[3],out_channels=512,stride=2)
    self.avgpool = nn.AdaptiveAvgPool2d((1,1))
    self.fc = nn.Linear(512*4,num_classes)

  def forward(self,x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)
    x = self.avgpool(x)
    x = x.reshape(x.shape[0],-1)
    x = self.fc(x)
    return x


  def _make_layer(self,block,num_residual_blocks,out_channels,stride):
    identity_downsample = None
    layers = []

    if stride != 1 or self.in_channels != out_channels*4:
      identity_downsample = nn.Sequential(nn.Conv2d(self.in_channels,out_channels*4,kernel_size=1,stride=stride),
                                        nn.BatchNorm2d(out_channels*4))

    layers.append(block(self.in_channels,out_channels,identity_downsample,stride))
    self.in_channels = out_channels*4

    for i in range(num_residual_blocks-1):
      layers.append(block(self.in_channels,out_channels))
    return nn.Sequential(*layers)

def ResNet50(img_channels=3,num_classes=1000):
  return ResNet(block,[3,4,6,3],img_channels,num_classes)

def ResNet101(img_channels=3,num_classes=1000):
  return ResNet(block,[3,4,23,3],img_channels,num_classes)

def ResNet152(img_channels=3,num_classes=1000):
  return ResNet(block,[3,8,36,3],img_channels,num_classes)

def test():
  net = ResNet50()
  x = torch.rand(2,3,224,224)
  y = net(x)
  print(y.shape)

test()

torch.Size([2, 1000])


In [None]:


# Imports
import torch
import torch.nn.functional as F  # Parameterless functions, like (some) activation functions
import torchvision.datasets as datasets  # Standard datasets
import torchvision.transforms as transforms  # Transformations we can perform on our dataset for augmentation
from torch import optim  # For optimizers like SGD, Adam, etc.
from torch import nn  # All neural network modules
from torch.utils.data import (
    DataLoader,
)  # Gives easier dataset managment by creating mini batches etc.
from tqdm import tqdm  # For nice progress bar!
import torchvision


# Hyperparameters
in_channels = 3
num_classes = 10
learning_rate = 3e-4 # karpathy's constant
batch_size = 64
num_epochs = 3
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import sys
#load the pretrain model and modify it
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        return x
model = torchvision.models.vgg16(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

model.avgpool = Identity()
model.classifier = nn.Sequential(nn.Linear(512,10),nn.ReLU(),nn.Linear(100,10))
model.to(device)

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor()
])

import torch
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch import optim, nn
from torch.utils.data import DataLoader
from tqdm import tqdm
import torchvision

# Hyperparameters
num_classes = 10
learning_rate = 3e-4
batch_size = 64
num_epochs = 3
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1. Data transforms: resize to 224x224, convert to 3 channels, tensor
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor(),
])

# 2. Load Data
train_dataset = datasets.MNIST(
    root="dataset/", train=True, transform=transform, download=True
)
test_dataset = datasets.MNIST(
    root="dataset/", train=False, transform=transform, download=True
)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# 3. Load pre-trained VGG16 and modify classifier
model = torchvision.models.vgg16(pretrained=True)
for param in model.features.parameters():  # Only freeze feature extractor
    param.requires_grad = False

# Replace classifier: VGG16 expects input of size 25088 (512*7*7) after avgpool for 224x224 images
model.avgpool = Identity()
model.classifier = nn.Sequential(nn.Linear(512,10),nn.ReLU(),nn.Linear(100,10))
model.to(device)
model.classifier = nn.Sequential(
    nn.Linear(512 * 7 * 7, 512),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(512, num_classes)
)
model.to(device)

# 4. Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate)  # Only train classifier

# 5. Training loop
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (data, targets) in enumerate(tqdm(train_loader)):
        data = data.to(device)
        targets = targets.to(device)

        # Forward
        scores = model(data)
        loss = criterion(scores, targets)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# 6. Accuracy check
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device)
            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum().item()
            num_samples += predictions.size(0)
    model.train()
    return num_correct / num_samples


    model.train()
    return num_correct / num_samples


print(f"Accuracy on training set: {check_accuracy(train_loader, model)*100:.2f}")
print(f"Accuracy on test set: {check_accuracy(test_loader, model)*100:.2f}")

  0%|          | 0/938 [00:00<?, ?it/s]


RuntimeError: Given groups=1, weight of size [64, 3, 3, 3], expected input[64, 1, 28, 28] to have 3 channels, but got 1 channels instead