In [1]:
#Input for ConvNet is fixed of [224,224,3]
#Conv layer has the kernel_size = (3,3) , (1,1) with the stride fixed to 1 pixel . Padding = 1 for (3 ,3) layer
#Linear follow up by non-linearity
#Consist of five max pooling layers , followed some of the conv layers with kernel_size = (2,2) and stride of 2
#A stack of convolutional layers followed by three linear layers , first two have 4096 , third has 1000 classification
#The final is softmax layer
#The width of conv. layers (the number of channels) is rather small, starting from 64 in the first layer and then
#First two fully connected layer has dropout(p = 0.5)
#increasing by a factor of 2 after each max-pooling layer, until it reaches 512.
#optimizer = torch.optim.SGD(momentum = 0.9 , weight_decay = 5*10**-4 , lr = 10**-2)
#nn.MaxPool2d(kernel_size = 2 , stride = 1)
#Lr is decrease to a factor of 10 if validation set accuracy stop improving

In [22]:
import torch
from torch import nn
from tqdm.auto import tqdm
from going_modular import data_setup , dataloader_setup , engine

device = "cuda" if torch.cuda.is_available() else "cpu"

VGG_types = {
"VGG11": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
"VGG13": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
"VGG16": [64,64,"M",128,128,"M",256,256,256,"M",512,512,512,"M",512,512,512,"M",],
"VGG19": [64,64,"M",128,128,"M",256,256,256,256,"M",512,512,512,512,
          "M",512,512,512,512,"M",],}
VGGtype = "VGG16"

In [8]:
class VGG(nn.Module):
    def __init__(self , in_channels = 3 , num_classes = 1000):
        super(VGG , self).__init__()
        self.in_channels = in_channels
        self.conv_layers = self.create_conv_layers(VGG_types["VGG16"])
        self.classifier = nn.Sequential(
            nn.Linear(in_features = 512*7*7 , out_features = 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(in_features = 4096 , out_features = 4096),
            nn.ReLU(),
            nn.Dropout(p = 0.5),
            nn.Linear(in_features = 4096 , out_features = num_classes)
        )
    def forward(self , x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0],-1) #Flattening every dimension except batch_size
        x = self.classifier(x)
        return x
    def create_conv_layers(self, architecture):
        layers = []
        in_channels = self.in_channels
        for x in architecture:
            if type(x) == int:
                out_channels = x
                layers += [
                    nn.Conv2d(in_channels = in_channels , out_channels = out_channels, 
                              kernel_size = 3 , padding = 1 , stride = 1),
                    nn.BatchNorm2d(x),
                    nn.ReLU()
                ]
                in_channels = x
            elif x == "M":
                layers += [nn.MaxPool2d(kernel_size = 2 , stride = 2)]
        return nn.Sequential(*layers)

In [10]:
random_tensor = torch.randn(1,3,224,224).to(device)
model = VGG(in_channels = 3 , num_classes = 3).to(device)
#print(model)
print(model(random_tensor).shape)

torch.Size([1, 3])


In [19]:
import os
import torchvision
from torchvision import transforms , datasets
from torch.utils.data import DataLoader

BATCH_SIZE = 32
NUM_WORKERS = os.cpu_count()

image_path = data_setup.download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip",
                           destination="pizza_steak_sushi")
train_dir = image_path/"train"
test_dir = image_path/"test"

def create_dataloaders(train_dir , test_dir , transform , batch_size = BATCH_SIZE , num_workers = NUM_WORKERS):
    train_data = datasets.ImageFolder(root = train_dir , transform = transform)
    test_data = datasets.ImageFolder(root = test_dir , transform = transform)

    num_classes = train_data.classes

    train_dataloader = DataLoader(train_data , batch_size = BATCH_SIZE , num_workers = NUM_WORKERS ,
                                  pin_memory = True ,shuffle = True)
    test_dataloader = DataLoader(test_data , batch_size = BATCH_SIZE , num_workers = NUM_WORKERS ,
                                pin_memory = True , shuffle = False)
    return train_dataloader , test_dataloader , num_classes

[INFO] data\pizza_steak_sushi directory exists, skipping download.


In [24]:
def train_step(model , train_dataloader, loss_fn , optimizer , device = device):
    model.train()
    train_loss , train_acc = 0,0
    for batch , (x,y) in enumerate(train_dataloader):
        x , y = x.to(device) , y.to(device)
        y_preds = model(x)
        loss = loss_fn(y_preds, y)
        train_loss+= loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        y_preds_class = torch.argmax(torch.softmax(y_preds , dim = 1), dim = 1)
        train_acc += (y_preds_class == y).sum().item()/len(y_preds)
    train_loss /= len(train_dataloader)
    train_acc /= len(train_dataloader)
    return train_loss, train_acc
def test_step(model , test_dataloader , loss_fn , device = device):
    model.eval()
    test_loss , test_acc = 0,0
    with torch.inference_mode():
        for batch , (x,y) in enumerate(test_dataloader):
            x , y = x.to(device) , y.to(device)
            y_preds = model(x)
            loss = loss_fn(y_preds , y)
            test_loss += loss.item()

            y_preds_class = torch.argmax(y_preds , dim = 1)
            test_acc += (y_preds_class == y).sum().item()/len(y_preds_class)
    test_loss/=len(test_dataloader)
    test_acc/=len(test_dataloader)
    return test_loss , test_acc
def train(model , train_dataloader, test_dataloader , loss_fn , optimizer , epochs , device = device):
    results = {"train_loss":[],
              "train_acc":[],
              "test_loss":[],
              "test_acc":[]}
    for epoch in tqdm(range(epochs)):
        train_loss , train_acc = train_step(model = model , train_dataloader = train_dataloader , 
                                      loss_fn = loss_fn ,
                                      optimizer = optimizer , 
                                      device = device)
        test_loss , test_acc = test_step(model = model , test_dataloader = test_dataloader,
                                        loss_fn = loss_fn,
                                        device = device)
        print(
          f"Epoch: {epochs+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
         )
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)
    return results

In [25]:
weights = torchvision.models.ViT_B_16_Weights.DEFAULT
auto_transform = weights.transforms()

train_dataloader , test_dataloader , num_classes = create_dataloaders(train_dir = train_dir 
                                                                    , test_dir = test_dir , 
                                                                     transform = auto_transform,
                                                                     batch_size = BATCH_SIZE,
                                                                     num_workers = NUM_WORKERS)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params = model.parameters() , momentum = 0.9 , lr = 0.01)

model_results = train(model = model,
                     train_dataloader = train_dataloader,
                     test_dataloader = test_dataloader,
                     loss_fn = loss_fn,
                     optimizer = optimizer,
                     epochs = 10,
                     device = device)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 11 | train_loss: 1.3738 | train_acc: 0.3984 | test_loss: 1.0067 | test_acc: 0.5417
Epoch: 11 | train_loss: 1.9225 | train_acc: 0.3125 | test_loss: 29.2798 | test_acc: 0.3920
Epoch: 11 | train_loss: 2.0587 | train_acc: 0.4492 | test_loss: 248.3902 | test_acc: 0.5417
Epoch: 11 | train_loss: 3.4229 | train_acc: 0.3906 | test_loss: 6914.2114 | test_acc: 0.5417
Epoch: 11 | train_loss: 8.6835 | train_acc: 0.3281 | test_loss: 137.3648 | test_acc: 0.2604
Epoch: 11 | train_loss: 10.1790 | train_acc: 0.3945 | test_loss: 404.4097 | test_acc: 0.1979
Epoch: 11 | train_loss: 1.7417 | train_acc: 0.4258 | test_loss: 40.6589 | test_acc: 0.1979
Epoch: 11 | train_loss: 2.8186 | train_acc: 0.3438 | test_loss: 30.9871 | test_acc: 0.4517
Epoch: 11 | train_loss: 4.2234 | train_acc: 0.2930 | test_loss: 34.5978 | test_acc: 0.5417
Epoch: 11 | train_loss: 6.7339 | train_acc: 0.4258 | test_loss: 132.1926 | test_acc: 0.3305
