# Esfandiar Kiani - ML - HM05.8

## Imports

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from tqdm import tqdm

## Set to GPU

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Dataset loader

In [3]:
class MNISTDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        label = row[0]
        image = row[1:].values.astype('float32').reshape(28, 28)
        if self.transform:
            image = self.transform(image)
        return image, label

In [4]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

## Load data & Batching

In [21]:
train_dataset = MNISTDataset("D:/M.A/T1/ML/Assignments/HW5_ANN  KNN/MNIST_CSV/mnist_train.csv", transform=transform)
test_dataset  = MNISTDataset("D:/M.A/T1/ML/Assignments/HW5_ANN  KNN/MNIST_CSV/mnist_test.csv", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

## Train and Eval functions

In [15]:
def train_model(model, optimizer, criterion, scheduler=None, epochs=5):
    model.to(device)
    for epoch in range(epochs):
        model.train()
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            if scheduler:
                scheduler.step()

        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}") 

In [10]:
def evaluate_model(model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Accuracy: {accuracy:.2f}%")


# Part-A

## Multi-Layer Perceptron Architecture

In [6]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(28*28, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x


## Train and test

In [20]:
mlp_model = MLP()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mlp_model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.5)

train_model(mlp_model, optimizer, criterion, epochs=10)
evaluate_model(mlp_model)

  label = row[0]


Epoch [1/10], Loss: 0.3157
Epoch [2/10], Loss: 0.1232
Epoch [3/10], Loss: 0.0647
Epoch [4/10], Loss: 0.0284
Epoch [5/10], Loss: 0.0369
Epoch [6/10], Loss: 0.0308
Epoch [7/10], Loss: 0.0107
Epoch [8/10], Loss: 0.0051
Epoch [9/10], Loss: 0.0751
Epoch [10/10], Loss: 0.0369


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)

# Part-B

## Auto-Encoder

### Network Architecture

In [30]:
class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(28*28, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 5)
        )
        self.decoder = nn.Sequential(
            nn.Linear(5, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 28*28),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = x.view(-1, 28*28)
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

### Instance creating

In [42]:
autoencoder = AutoEncoder().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)

### Train part-1

In [39]:
epochs = 10
for epoch in range(epochs):
    autoencoder.train()
    for images, _ in train_loader:
        images = images.to(device)
        optimizer.zero_grad()
        outputs = autoencoder(images)
        loss = criterion(outputs, images.view(-1, 28*28))
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

  label = row[0]


Epoch [1/10], Loss: 28547.8145
Epoch [2/10], Loss: 29426.5254
Epoch [3/10], Loss: 29151.2852
Epoch [4/10], Loss: 27816.1016
Epoch [5/10], Loss: 29148.7207
Epoch [6/10], Loss: 30060.8535
Epoch [7/10], Loss: 30225.9336
Epoch [8/10], Loss: 29982.3496
Epoch [9/10], Loss: 29205.3340
Epoch [10/10], Loss: 29584.1191


## Transfer-Learning

### Network architecture

In [40]:
class TransferClassifier(nn.Module):
    def __init__(self, encoder):
        super(TransferClassifier, self).__init__()
        self.encoder = encoder
        self.classifier = nn.Linear(5, 10)

    def forward(self, x):
        with torch.no_grad():
            x = x.view(-1, 28*28)
            x = self.encoder(x)
        x = self.classifier(x)
        return x


### Train and eval

In [46]:
transfer_model = TransferClassifier(autoencoder.encoder).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(transfer_model.parameters(), lr=0.001)

train_model(transfer_model, optimizer, criterion, epochs=15)
evaluate_model(transfer_model)

  label = row[0]


Epoch [1/15], Loss: 2.6612
Epoch [2/15], Loss: 2.1548
Epoch [3/15], Loss: 2.0169
Epoch [4/15], Loss: 2.0352
Epoch [5/15], Loss: 1.9865
Epoch [6/15], Loss: 1.8146
Epoch [7/15], Loss: 1.9154
Epoch [8/15], Loss: 1.7383
Epoch [9/15], Loss: 1.9882
Epoch [10/15], Loss: 1.9067
Epoch [11/15], Loss: 1.6719
Epoch [12/15], Loss: 1.8012
Epoch [13/15], Loss: 1.8615
Epoch [14/15], Loss: 1.9388
Epoch [15/15], Loss: 1.8047


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)

# Part-C

## Network

In [43]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64*7*7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64*7*7)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [47]:
cnn_model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)

train_model(cnn_model, optimizer, criterion, epochs=15)
evaluate_model(cnn_model)

  label = row[0]


Epoch [1/15], Loss: 0.0857
Epoch [2/15], Loss: 0.0068
Epoch [3/15], Loss: 0.0315
Epoch [4/15], Loss: 0.0127
Epoch [5/15], Loss: 0.0198
Epoch [6/15], Loss: 0.0325
Epoch [7/15], Loss: 0.0675
Epoch [8/15], Loss: 0.0081
Epoch [9/15], Loss: 0.0065
Epoch [10/15], Loss: 0.0037
Epoch [11/15], Loss: 0.0042
Epoch [12/15], Loss: 0.0183
Epoch [13/15], Loss: 0.0000
Epoch [14/15], Loss: 0.0104
Epoch [15/15], Loss: 0.0108


RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor