# AlexNet
In this, I will be attempting to reimplement AlexNet using PyTorch. The data loading part of this implementation is taken from https://github.com/dansuh17/alexnet-pytorch/blob/master/model.py

In [47]:
# importing torch and torchvision methods
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F

# modules for dataset transformation/processing
from torch import nn, optim
from tensorboardX import SummaryWriter
from torchvision.datasets import CIFAR10
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split
from torchvision import transforms
from torchvision.utils import save_image

# importing basic plotting + computation libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# file management library
import os

# checking if GPU available for training
device = ("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [48]:
# defining constants
NUM_EPOCHS = 90
BATCH_SIZE = 128

MOMENTUM = 0.9
LR_DECAY = 0.0005
LR_INIT = 0.01

IMAGE_DIM = 227
NUM_CLASSES = 1000  # 1000 classes for imagenet 2012 dataset

# modify this to point to your data directory
INPUT_ROOT_DIR = 'alexnet_data_in'
TRAIN_IMG_DIR = 'alexnet_data_in/imagenet'
OUTPUT_DIR = 'alexnet_data_out'
LOG_DIR = OUTPUT_DIR + '/tblogs'  # tensorboard logs
CHECKPOINT_DIR = OUTPUT_DIR + '/models'  # model checkpoints

In [53]:
# defining the network structure
class AlexNet(nn.Module):
    
    def __init__(self, num_classes=1000):
        """
        
        """
    
    
        # calling parent constructor        
        super().__init__()
        
        # compsition of convolutional + maxpooling + activation layers
        self.extract_features = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=96, stride=4, kernel_size=11, padding=2),
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, padding=2, stride=1),
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, padding=1, stride=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, padding=1, stride=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1, stride=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        
        # composition of linear classification layers
        self.MLP = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(in_features=256 * 6 * 6, out_features=4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(inplace=True),
            nn.Linear(in_features=4096, out_features=1000)
        )
        
        self.init_bias()
        
        # defining the network in terms of individual layers
        """
        self.cnv1 = nn.Conv2d(in_channels=3, out_channels=96, stride=4, kernel_size=11, padding=0)
        self.mp = nn.MaxPool2d(kernel_size=3, stride=2)
        self.cnv2 = nn.Conv2d(in_channels=96, out_channels=256,kernel_size=5, padding=2, stride=1)
        self.cnv3 = nn.Conv2d(in_channels=256, out_channels=384,kernel_size=3, padding=1, stride=1)
        self.cnv4 = nn.Conv2d(in_channels=384, out_channels=384,kernel_size=3, padding=1, stride=1)
        self.cnv5 = nn.Conv2d(in_channels=384, out_channels=256,kernel_size=3, padding=1, stride=1)
        self.fc1 = nn.Linear(in_features=9216, out_features=4096)
        self.fc2 = nn.Linear(in_features=4096, out_features=4096)
        self.fc3 = nn.Linear(in_features=4096, out_features=10)
        """
        
    def forward(self, x):
        
        x = self.extract_features(x)
        x = x.view(-1, 256 *6 * 6)
        x = self.MLP(x)
        
        return x
        
        # original definition of the forward propogation with individual layer defs
        """
        x = F.relu(self.conv1(x))
        x = self.mp(x)
        x = F.relu(self.conv2(x))
        x = self.mp(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.mp(x)
        
        x = x.reshape(x.shape[0],-1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        
        return x
        """
        
    def init_bias(self):
        for layer in self.extract_features:
            if isinstance(layer, nn.Conv2d):
                nn.init.normal_(layer.weight, mean=0, std=0.01)
                nn.init.constant_(layer.bias, 0)
        # original paper = 1 for Conv2d layers 2nd, 4th, and 5th conv layers
        nn.init.constant_(self.extract_features[4].bias, 1)
        nn.init.constant_(self.extract_features[10].bias, 1)
        nn.init.constant_(self.extract_features[12].bias, 1)

In [57]:
# loading the dataset


# getting an initial seed
seed = torch.initial_seed()

tbwriter = SummaryWriter(log_dir=LOG_DIR)

# loading the model
alexnet = AlexNet(num_classes=NUM_CLASSES).to(device)

# using dataloaders
dataset = datasets.ImageFolder(TRAIN_IMG_DIR, transforms.Compose([
    transforms.CenterCrop(IMAGE_DIM),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]))

dataloader = data.DataLoader(
    dataset,
    shuffle=True,
    pin_memory=True,
    num_workers=8,
    drop_last=True,
    batch_size=BATCH_SIZE
)

FileNotFoundError: [Errno 2] No such file or directory: 'alexnet_data_in/imagenet'

In [59]:
# creating the optimizer
optimizer = optim.Adam(params=AlexNet.parameters(), lr=1e-4)

AttributeError: 'int' object has no attribute 'named_parameters'

In [None]:
# model training
total_steps=1

for epoch in range(NUM_EPOCHS):
    lr_scheduler.step()
    for imgs, classes in dataloader:
        imgs, classes = imgs.to(device), classes.to(device)
        
        output=alexnet(imgs)
        loss=F.cross_entropy(output, classes)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if total_steps % 10 == 0:
            with torch.no_grad():
                _, preds = torch.max(output, 1)
                accuracy = torch.sum(preds == classes)
                
        print('Epoch: {} \tStep: {} \tLoss: {:.4f} \tAcc: {}'
                        .format(epoch + 1, total_steps, loss.item(), accuracy.item()))
                    tbwriter.add_scalar('loss', loss.item(), total_steps)
                    tbwriter.add_scalar('accuracy', accuracy.item(), total_steps)
        
        if total_steps % 100 == 0:
                with torch.no_grad():
                    # print and save the grad of the parameters
                    # also print and save parameter values
                    print('*' * 10)
                    for name, parameter in alexnet.named_parameters():
                        if parameter.grad is not None:
                            avg_grad = torch.mean(parameter.grad)
                            print('\t{} - grad_avg: {}'.format(name, avg_grad))
                            tbwriter.add_scalar('grad_avg/{}'.format(name), avg_grad.item(), total_steps)
                            tbwriter.add_histogram('grad/{}'.format(name),
                                    parameter.grad.cpu().numpy(), total_steps)
                        if parameter.data is not None:
                            avg_weight = torch.mean(parameter.data)
                            print('\t{} - param_avg: {}'.format(name, avg_weight))
                            tbwriter.add_histogram('weight/{}'.format(name),
                                    parameter.data.cpu().numpy(), total_steps)
                            tbwriter.add_scalar('weight_avg/{}'.format(name), avg_weight.item(), total_steps)
        
        total_steps += 1
        
    checkpoint_path = os.path.join(CHECKPOINT_DIR, 'alexnet_states_e{}.pkl'.format(epoch + 1))
        state = {
            'epoch': epoch,
            'total_steps': total_steps,
            'optimizer': optimizer.state_dict(),
            'model': alexnet.state_dict(),
            'seed': seed,
        }
        torch.save(state, checkpoint_path)