In [1]:
import torch
import numpy as np

In [23]:
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
import torch.optim as optimizer
import torch.nn as nn

device = torch.device("cuda"if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,),(0.5,))])
train_dataset = datasets.FashionMNIST(root='./data',train = True, transform = transform,download = True)
test_dataset = datasets.FashionMNIST(root='./data',train = False, transform = transform,download = True)


train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


class Fashion(nn.Module):
    def __init__(self):
        super().__init__()
        self.softmax = nn.Softmax(dim=1)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(64 * 3 * 3,32)
        self.output = nn.Linear(32,10)
        self.maxpool = nn.MaxPool2d((2,2),stride=2)
        self.conv1 = nn.Conv2d(1,64,kernel_size=3,padding = 1)
        self.conv2 = nn.Conv2d(64,128,kernel_size=3,padding = 1)
        self.conv3 = nn.Conv2d(128,64,kernel_size=3,padding=1)
    def forward(self,x):
        # Convolution
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.conv3(x)
        x = self.relu(x)
        x = self.maxpool(x)
        # Classification
        x = self.fc1(x.view(x.size(0),-1))
        x = self.relu(x)

        x = self.output(x)
        x = self.softmax(x)
        return x
model = Fashion().to(device)
learning_rate = 0.001
criterion = nn.CrossEntropyLoss()
opt = optimizer.Adam(model.parameters(),lr = learning_rate)
epochs = 30

for epoch in range(epochs):
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        opt.zero_grad()
        outputs = model(images)
        loss = criterion(outputs,labels)
        loss.backward()
        opt.step()
    print(f"Epoch {epoch} LOSS: {loss}")

torch.save(model.state_dict(),"model_mark_1.pth")

    

Using device: cuda
Epoch 0 LOSS: 1.5902713537216187
Epoch 1 LOSS: 1.50687575340271
Epoch 2 LOSS: 1.6562246084213257
Epoch 3 LOSS: 1.5864028930664062
Epoch 4 LOSS: 1.4916964769363403
Epoch 5 LOSS: 1.553903579711914
Epoch 6 LOSS: 1.5253033638000488
Epoch 7 LOSS: 1.6486217975616455
Epoch 8 LOSS: 1.6720936298370361
Epoch 9 LOSS: 1.588668704032898
Epoch 10 LOSS: 1.5970699787139893
Epoch 11 LOSS: 1.581365704536438
Epoch 12 LOSS: 1.490739107131958
Epoch 13 LOSS: 1.6478017568588257
Epoch 14 LOSS: 1.4737496376037598
Epoch 15 LOSS: 1.5021872520446777
Epoch 16 LOSS: 1.5007485151290894
Epoch 17 LOSS: 1.5546743869781494
Epoch 18 LOSS: 1.5549339056015015
Epoch 19 LOSS: 1.5862196683883667
Epoch 20 LOSS: 1.4620792865753174
Epoch 21 LOSS: 1.554691195487976
Epoch 22 LOSS: 1.5236507654190063
Epoch 23 LOSS: 1.523865818977356
Epoch 24 LOSS: 1.5237473249435425
Epoch 25 LOSS: 1.5861517190933228
Epoch 26 LOSS: 1.6046675443649292
Epoch 27 LOSS: 1.597828984260559
Epoch 28 LOSS: 1.492881417274475
Epoch 29 LOSS: 

In [35]:
class CNNMnist(nn.Module):
    def __init__(self):
        super().__init__()
        # Convolution layers
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
        
        # Pooling and activation
        self.maxpool = nn.MaxPool2d((2,2), stride=2)
        self.relu = nn.ReLU()
        
        # Fully connected layers
        self.fc1 = nn.Linear(64 * 3 * 3, 32)  # Adjusted input size based on convolution output
        self.output = nn.Linear(32, 10)
        
        # Softmax
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        # Convolution layers
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        x = self.conv2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        x = self.conv3(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        # Flatten the tensor
        x = x.view(x.size(0), -1)
        
        # Fully connected layers
        x = self.fc1(x)
        x = self.relu(x)
        x = self.output(x)
        x = self.softmax(x)
        
        return x
    
model = CNNMnist().to(device)
model.load_state_dict(torch.load("/home/student/Documents/DL_220962089/Lab2/MNIST_model.pt"))
# model = torch.load("/home/student/Documents/DL_220962089/Lab2/MNIST_model.pt")
# model.to(device)
# for x in model.state_dict().keys():
#     print("x value : ",x,'\t',"size : ",model.state_dict()[x].size())
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)        
        _, predicted = torch.max(outputs.data, 1)
        # print("True label:{}".format(labels))
        # print('Predicted: {}'.format(predicted))
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")

  model.load_state_dict(torch.load("/home/student/Documents/DL_220962089/Lab2/MNIST_model.pt"))


Test Accuracy: 4.83%


# Q2

In [36]:
import torch 
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision.models as models
from torchvision import datasets, transforms

train_dir = "/home/student/Documents/DL_220962089/Lab2/cats_and_dogs_filtered(2)/cats_and_dogs_filtered/train"
val_dir = "/home/student/Documents/DL_220962089/Lab2/cats_and_dogs_filtered(2)/cats_and_dogs_filtered/validation"

transform = transforms.Compose([
    transforms.Resize((224, 224)),        # AlexNet expects 224x224 input images
    transforms.ToTensor(),                # Convert image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # AlexNet normalization
])
train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
val_dataset = datasets.ImageFolder(root=val_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

model = models.alexnet(pretrained = True)

for params in model.parameters():
    params.requires_grad = False

class CustomANN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(256*6*6,512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 2)            # Output layer for binary classification (cats and dogs)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)
    
    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the input
        x = self.relu(self.fc1(x))  # Apply the first hidden layer
        x = self.relu(self.fc2(x))  # Apply the second hidden layer
        x = self.fc3(x)             # Output layer
        return x

model.classifier = CustomANN()
model = model.to(device)



Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /home/student/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100.0%


In [39]:
learning_rate = 0.001
criterion = nn.CrossEntropyLoss()
opt = optimizer.Adam(model.parameters(),lr = learning_rate)
epochs = 20
for epoch in range(epochs):
    for image, label in train_loader:
        image = image.to(device)
        label = label.to(device)
        opt.zero_grad()
        output = model(image)
        loss = criterion(output,label)
        loss.backward()
        opt.step()
    print(f"Epoch {epoch} LOSS: {loss}")

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for image, label in val_loader:
        image = image.to(device)
        label = label.to(device)
        output = model(image)
        _,predicted = torch.max(output.data,1)
        total+=label.size(0)
        correct+=(predicted==label).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")



Epoch 0 LOSS: 8.940690321423972e-08
Epoch 1 LOSS: 0.0027074606623500586
Epoch 2 LOSS: 0.0
Epoch 3 LOSS: 3.7252892326478104e-08
Epoch 4 LOSS: 9.68567974268808e-07
Epoch 5 LOSS: 0.0
Epoch 6 LOSS: 0.0
Epoch 7 LOSS: 0.0
Epoch 8 LOSS: 0.0
Epoch 9 LOSS: 0.0
Epoch 10 LOSS: 0.0
Epoch 11 LOSS: 0.0
Epoch 12 LOSS: 0.0
Epoch 13 LOSS: 0.0
Epoch 14 LOSS: 0.0
Epoch 15 LOSS: 0.0
Epoch 16 LOSS: 0.0
Epoch 17 LOSS: 0.0
Epoch 18 LOSS: 0.0
Epoch 19 LOSS: 0.0
Test Accuracy: 94.80%


# Q3

In [41]:



# Transforms remain the same
transform = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize((0.5,), (0.5,))
])
# Load datasets
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

def save_checkpoint(model,optimizer,epoch,loss, file = "checkpoint.pt"):
    checkpoint={
        'epoch':epoch,
        'model_state_dict':model.state_dict(),
        'optimizer_state_dict':optimizer.state_dict(),
        'loss':loss
    }
    torch.save(checkpoint,file)

model = CNNMnist().to(device)
epochs = 20
opt = optimizer.Adam(model.parameters(),lr = learning_rate)
for epoch in range(epochs):
    for image, label in train_loader:
        image = image.to(device)
        label = label.to(device)
        opt.zero_grad()
        output = model(image)
        loss = criterion(output,label)
        loss.backward()
        opt.step()
    print(f"Epoch {epoch} LOSS: {loss}")
save_checkpoint(model, opt, epoch, loss)
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for image, label in val_loader:
        image = image.to(device)
        label = label.to(device)
        output = model(image)
        _,predicted = torch.max(output.data,1)
        total+=label.size(0)
        correct+=(predicted==label).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")


Epoch 0 LOSS: 1.520649790763855
Epoch 1 LOSS: 1.5860027074813843
Epoch 2 LOSS: 1.523446798324585
Epoch 3 LOSS: 1.6157256364822388
Epoch 4 LOSS: 1.6470552682876587
Epoch 5 LOSS: 1.583660364151001
Epoch 6 LOSS: 1.4913992881774902
Epoch 7 LOSS: 1.492435336112976
Epoch 8 LOSS: 1.4922144412994385
Epoch 9 LOSS: 1.4767028093338013
Epoch 10 LOSS: 1.4611507654190063
Epoch 11 LOSS: 1.4924007654190063
Epoch 12 LOSS: 1.4611507654190063
Epoch 13 LOSS: 1.4611507654190063
Epoch 14 LOSS: 1.4611507654190063
Epoch 15 LOSS: 1.466123104095459
Epoch 16 LOSS: 1.4611507654190063
Epoch 17 LOSS: 1.4611507654190063
Epoch 18 LOSS: 1.4946719408035278
Epoch 19 LOSS: 1.4611507654190063


RuntimeError: Given groups=1, weight of size [64, 1, 3, 3], expected input[32, 3, 224, 224] to have 1 channels, but got 3 channels instead