# Write AlexNet from Scratch
**AlexNet** architecture in PyTorch by completing the required sections. The model should include convolutional layers, ReLU activations, pooling layers, and fully connected layers to process image data for classification tasks.

1. Define the AlexNet Architecture:
**Feature Extractor (Convolutional Base):**
* Stack convolutional layers with appropriate kernel sizes, strides, and paddings.
* Use nn.ReLU as the activation function after each convolution.
* Apply nn.MaxPool2d after selected layers to reduce spatial dimensions.
**Classifier (Fully Connected Layers):**
* Flatten the output from the convolutional base.
* Add fully connected layers with ReLU activations and dropout for regularization.
* End with a final linear layer projecting to the number of output classes.
  
2. Implement the Forward Method:
* Pass the input image through the convolutional base.
* Flatten the feature map output to a vector.
* Pass it through the fully connected classifier to produce final predictions.

3. Weight Initialization:
* Initialize weights of convolutional and linear layers using a normal distribution.
* Set biases to zero.



In [27]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader

In [39]:
#dataset
transform=transforms.Compose([
    transforms.Resize(224), #resize to Alexnet input
    transforms.ToTensor(),
    #transforms.Normalize((0.5,),(0.5,))
    ])

train_data=datasets.CIFAR10(root="./data",train=True,download=True,transform=transform)
train_loader=DataLoader(train_data,batch_size=128,shuffle=True,num_workers=4,pin_memory=True,persistent_workers=True)

test_data=datasets.CIFAR10(root="./data",train=False,download=True,transform=transform)
test_loader=DataLoader(test_data,batch_size=128,shuffle=False,num_workers=4,pin_memory=True,persistent_workers=True)


In [40]:
#defining Alexnet
class Alexnet_rk(nn.Module):
    def __init__(self,num_classes=10):
        super(Alexnet_rk,self).__init__()
        self.features=nn.Sequential(
            nn.Conv2d(3,96,kernel_size=11,stride=4,padding=4),
            nn.ReLU(inplace=True),
            nn.Conv2d(96,256,kernel_size=5,padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3,stride=2),
            nn.Conv2d(256,384,kernel_size=3,padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384,384,kernel_size=3,padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384,256,kernel_size=3,padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3,stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6,6))

        self.classifier=nn.Sequential(
            nn.Dropout(),
            nn.Linear(256*6*6,4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096,4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096,num_classes)
        )
        self._initialize_weights()

    def forward(self,x):
       x=self.features(x)
       x = self.avgpool(x)
       x=torch.flatten(x,1)
       x=self.classifier(x)
       return x

    def _initialize_weights(self):
       for m in self.modules():
           if isinstance(m,nn.Conv2d):
               nn.init.normal_(m.weight,mean=0.0,std=0.01)
               nn.init.constant_(m.bias,0)
           elif isinstance(m,nn.Linear):
               nn.init.normal_(m.weight,mean=0.0,std=0.01)
               nn.init.constant_(m.bias,0)


In [41]:
criterion=nn.CrossEntropyLoss()
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
model=Alexnet_rk(num_classes=10).to(device)
optimizer=optim.Adam(model.parameters(),lr=0.0001)
print(device)

cuda


In [42]:
#Training Loop
for epoch in range(50):
    model.train()
    running_loss=0.0
    correct=0
    total=0
    for imgs, labels in train_loader:
        imgs,labels=imgs.to(device),labels.to(device)
        output = model(imgs)
        loss=criterion(output,labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss+=loss.item()
        _,predicted=output.max(1)
        total+=labels.size(0)
        correct+=predicted.eq(labels).sum().item()
    print(f"Epoch:{epoch} | Loss:{running_loss:.4f} | Accuracy:{100*correct/total:.2f}%")

Epoch:0 | Loss:705.4093 | Accuracy:33.33%
Epoch:1 | Loss:523.8876 | Accuracy:51.39%
Epoch:2 | Loss:427.8716 | Accuracy:61.05%
Epoch:3 | Loss:365.9384 | Accuracy:67.02%
Epoch:4 | Loss:323.7436 | Accuracy:70.74%
Epoch:5 | Loss:287.4380 | Accuracy:74.24%
Epoch:6 | Loss:255.1913 | Accuracy:77.07%
Epoch:7 | Loss:228.1779 | Accuracy:79.58%
Epoch:8 | Loss:203.9353 | Accuracy:81.73%
Epoch:9 | Loss:181.4172 | Accuracy:83.76%
Epoch:10 | Loss:159.5619 | Accuracy:85.69%
Epoch:11 | Loss:139.4556 | Accuracy:87.51%
Epoch:12 | Loss:123.7486 | Accuracy:88.79%
Epoch:13 | Loss:108.3243 | Accuracy:90.29%
Epoch:14 | Loss:97.4408 | Accuracy:91.17%
Epoch:15 | Loss:80.3561 | Accuracy:92.84%
Epoch:16 | Loss:73.6210 | Accuracy:93.36%
Epoch:17 | Loss:64.1876 | Accuracy:94.22%
Epoch:18 | Loss:57.8729 | Accuracy:94.82%
Epoch:19 | Loss:53.3413 | Accuracy:95.29%
Epoch:20 | Loss:48.5626 | Accuracy:95.67%
Epoch:21 | Loss:44.3105 | Accuracy:96.13%
Epoch:22 | Loss:39.9445 | Accuracy:96.41%
Epoch:23 | Loss:38.7980 | Accu

In [48]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for imgs, labels in test_loader:
        imgs, labels = imgs.to(device), labels.to(device)

        output = model(imgs)
        predicted = output.argmax(dim=1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    print(f"Test Accuracy: {100*correct/total:.2f}%")


Test Accuracy: 84.03%
