<a href="https://colab.research.google.com/github/IoannisDem/SimpleDL_models/blob/main/alexnet_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Thu Jun 23 14:32:14 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    25W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
import torch
import torchvision
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split
from torchvision import transforms
import torch.optim as optim

In [4]:
!pip install torchsummary 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [5]:
from torchsummary import summary

In [6]:
import torch.nn as nn
from torch.nn.functional import softmax

In [7]:
#create convolution 1 and 2
class conv_block(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
    super(conv_block, self).__init__()
    self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
    self.relu = nn.ReLU()
    self.pool_max = nn.MaxPool2d(kernel_size=(3,3), stride=(2,2))
    #self.maths = None

  def forward(self, x):
    return self.pool_max(self.relu(self.conv(x)))

In [8]:
#x = torch.randn(4, 3, 227, 227)
#print(conv_block(3, 96, kernel_size=(11,11), stride=(4,4), padding=(0,0))(x))

In [9]:
#create AlexNet
#fiiiiiiiiiix convolution 5

class AlexNet(nn.Module):
  def __init__(self):
    super(AlexNet, self).__init__()
    self.convo = nn.Sequential(
        conv_block(3, 96, kernel_size=(11,11), stride=(4,4), padding=(0,0)),
        conv_block(96, 256, kernel_size=(5,5), stride=(1,1), padding=(2,2)),
        nn.Conv2d(256, 384, kernel_size=(3,3), stride=(1,1), padding=(1,1)), #conv3
        nn.ReLU(),
        nn.Conv2d(384, 384, kernel_size=(3,3), stride=(1,1), padding=(1,1)), #conv4
        nn.ReLU(),
        nn.Conv2d(384, 256, kernel_size=(3,3), stride=(1,1), padding=(1,1)), #conv5
        nn.MaxPool2d(kernel_size=(3,3), stride=(2,2)),
        nn.ReLU())
    self.lin = nn.Sequential(
        nn.Linear(9216, 4096), #fc1
        nn.ReLU(),
        nn.Linear(4096, 4096), #fc2
        nn.ReLU(),
        nn.Linear(4096, 10), #fc3
    )
  
  def forward(self, x):
    out = self.convo(x)
    out = out.reshape(out.size(0), -1)
    print(out.shape)
    return(softmax(self.lin(out)))

In [11]:
if torch.cuda.is_available():
  device = torch.device('cuda')
  print(1)

1


In [12]:
#initiate model
model = AlexNet().to(device)

In [14]:
#creating a dinstinct transform class for the train, validation and test dataset
tranform_train = transforms.Compose([transforms.Resize((227,227)), transforms.RandomHorizontalFlip(p=0.7), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
tranform_test = transforms.Compose([transforms.Resize((227,227)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

#preparing the train, validation and test dataset
torch.manual_seed(43)
train_ds = CIFAR10("data/", train=True, download=True, transform=tranform_train) #40,000 original images + transforms
val_size = 10000 #there are 10,000 test images and since there are no transforms performed on the test, we keep the validation as 10,000
train_size = len(train_ds) - val_size
train_ds, val_ds = random_split(train_ds, [train_size, val_size]) #Extracting the 10,000 validation images from the train set
test_ds = CIFAR10("data/", train=False, download=True, transform=tranform_test) #10,000 images

#passing the train, val and test datasets to the dataloader
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=64, shuffle=False)
test_dl = DataLoader(test_ds, batch_size=64, shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting data/cifar-10-python.tar.gz to data/
Files already downloaded and verified


In [15]:
## Loss and optimizer
learning_rate = 1e-4 #I picked this because it seems to be the most used by experts
load_model = True
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr= learning_rate) #Adam seems to be the most popular for deep learning

for epoch in range(5): #I decided to train the model for 50 epochs
    loss_ep = 0
    
    for batch_idx, (data, targets) in enumerate(train_dl):
        data = data.to(device=device)
        targets = targets.to(device=device)
        ## Forward Pass
        optimizer.zero_grad()
        scores = model(data)
        loss = criterion(scores,targets)
        loss.backward()
        optimizer.step()
        loss_ep += loss.item()
    print(f"Loss in epoch {epoch} :::: {loss_ep/len(train_dl)}")

    with torch.no_grad():
        num_correct = 0
        num_samples = 0
        for batch_idx, (data,targets) in enumerate(val_dl):
            data = data.to(device=device)
            targets = targets.to(device=device)
            ## Forward Pass
            scores = model(data)
            _, predictions = scores.max(1)
            num_correct += (predictions == targets).sum()
            num_samples += predictions.size(0)
        print(
            f"Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}"
        )

torch.Size([64, 9216])




torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size(

In [16]:
for epoch in range(15): #I decided to train the model for 50 epochs
    loss_ep = 0
    
    for batch_idx, (data, targets) in enumerate(train_dl):
        data = data.to(device=device)
        targets = targets.to(device=device)
        ## Forward Pass
        optimizer.zero_grad()
        scores = model(data)
        loss = criterion(scores,targets)
        loss.backward()
        optimizer.step()
        loss_ep += loss.item()
    print(f"Loss in epoch {epoch} :::: {loss_ep/len(train_dl)}")

    with torch.no_grad():
        num_correct = 0
        num_samples = 0
        for batch_idx, (data,targets) in enumerate(val_dl):
            data = data.to(device=device)
            targets = targets.to(device=device)
            ## Forward Pass
            scores = model(data)
            _, predictions = scores.max(1)
            num_correct += (predictions == targets).sum()
            num_samples += predictions.size(0)
        print(
            f"Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}"
        )

torch.Size([64, 9216])
torch.Size([64, 9216])




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64,

In [18]:
num_correct = 0
num_samples = 0
for batch_idx, (data,targets) in enumerate(test_dl):
    data = data.to(device)
    targets = targets.to(device)
    ## Forward Pass
    scores = model(data)
    _, predictions = scores.max(1)
    num_correct += (predictions == targets).sum()
    num_samples += predictions.size(0)
print(
    f"Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}"
)

torch.Size([64, 9216])
torch.Size([64, 9216])




torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size([64, 9216])
torch.Size(

In [43]:
#save model
path = F"/content/gdrive/My Drive/ColabNotebooks/models/alexN/alexN"
torch.save(model.state_dict(), path)