# Question 1

In [1]:
import torch
from torchvision.transforms import transforms
import pandas as pd
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader   

In [2]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
device = 'cpu' # cuda brings more headaches than answers for this exam

In [3]:
class CustomDataset(Dataset):
   def __init__(self, csv_file, root_dir, transform=None):
       self.data_frame = pd.read_csv(csv_file)
       self.root_dir = root_dir
       self.transform = transform

   def __len__(self):
       return len(self.data_frame)

   def __getitem__(self, idx):
       if torch.is_tensor(idx):
           idx = idx.tolist()

       img_name = os.path.join(self.root_dir, self.data_frame.iloc[idx, 0])
       image = Image.open(img_name)
       label = self.data_frame.iloc[idx, 1]

       if self.transform:
           image = self.transform(image)

       return image, label


In [4]:
transform = transforms.ToTensor()
train_dataset = CustomDataset('data/Data1/Data1/train.csv', "data/Data1/Data1", transform=transform)
test_dataset = CustomDataset('data/Data1/Data1/test.csv', "data/Data1/Data1", transform=transform)
train_dataset[300-1]


(tensor([[[1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
           1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
           1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
          [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
           1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
           1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
          [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
           1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
           1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
          [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
           1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
           1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
          [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
           1.0000, 1.

In [5]:

def filter_and_limit_dataset(dataset, num_samples=None, labels_list=None):
    """
    Filters and limits the dataset to num_samples and maps the labels to 0 and 1.

    dataset: The dataset to filter (e.g., MNIST).
    num_samples: The number of samples to include in the filtered dataset.
    labels_list: A list or set of two labels to include in the dataset.
    """
    if labels_list is not None:
        label_to_binary_map = {label: idx for idx, label in enumerate(labels_list)}
        filtered_dataset = [(img, label_to_binary_map[label]) for img, label in dataset if label in labels_list]
    else:
        # Create a list from the entire dataset if no labels are specified
        filtered_dataset = list(dataset)

    if num_samples is not None:
        return filtered_dataset[:num_samples]
    return filtered_dataset



In [6]:
# Filter and limit the datasets
labels_07 = {0, 7}

train_dataset_07 = filter_and_limit_dataset(train_dataset, labels_list = labels_07)
test_dataset_07 = filter_and_limit_dataset(test_dataset, labels_list = labels_07)
train_loader_07 = DataLoader(train_dataset_07, batch_size=32, shuffle=True)
test_loader_07 = DataLoader(test_dataset_07, batch_size=32)
img_size = train_dataset[0][0].shape[1]

In [7]:
class SimpleBinaryMLPMultiple(torch.nn.Module):
    def __init__(self, img_size=24,  n_classes=2):
        super().__init__()
        self.img_size = img_size
        self.l1 = torch.nn.Linear(img_size**2, 30)
        self.l2 = torch.nn.Linear(30, 15)
        self.l3 = torch.nn.Linear(15, n_classes)
        self.softmax = torch.nn.Softmax(dim=1)
    
    def forward(self, image):
        flattened_image = image.view(-1, self.img_size**2)
        x = self.l1(flattened_image)
        x = self.l2(x)
        x = self.softmax(self.l3(x))
        return x

In [8]:
def train(model, epoch, dataloader, optimizer, loss_function):
    model.train()
    size = len(dataloader.dataset)
    for batch, (image, target) in enumerate(dataloader):
        image = image.to(device)
        target = target.to(device).long()
        outputs = model(image)
        loss = loss_function(outputs, target)


        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 10 == 0:
            current = (batch + 1) * len(target)
            print(f"\rEpoch: {epoch}, loss: {loss.item():>7f}  [{current:>5d}/{size:>5d}]", end="")
    print(f"\rEpoch: {epoch}, loss: {loss.item():>7f}  [{size:>5d}/{size:>5d}]", end="")

In [9]:
def validate(dataloader, model):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():  # No gradient needed
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            
            _, predicted = torch.max(outputs, 1)

            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    accuracy = 100 * correct / total
    return accuracy


## Subquestion 1:
The performance is quite good for this relatively simple dataset with an accuracy close to 100%

In [10]:
mlp_model_07 = SimpleBinaryMLPMultiple()
mlp_model_07.to(device)

mlp_loss_fn_07 = torch.nn.CrossEntropyLoss()
mlp_optimizer_07 = torch.optim.Adam(params =  mlp_model_07.parameters(), lr=1e-05)

for epoch in range(100):
    train(mlp_model_07, epoch, train_loader_07, mlp_optimizer_07, mlp_loss_fn_07)
print()
    
# Calculate accuracy on test data
mlp_test_accuracy_07 = validate(test_loader_07, mlp_model_07)
print(f'Test Accuracy: {mlp_test_accuracy_07:.2f}%')

Epoch: 24, loss: 0.654177  [   32/  200]

Epoch: 99, loss: 0.547628  [  200/  200]
Test Accuracy: 97.83%


In [11]:
class ConvNet(torch.nn.Module):
    def __init__(self, n_classes):
        super().__init__()
        
        # Convolutional feature extractor
        self.feature_extractor = torch.nn.Sequential(            
            torch.nn.Conv2d(in_channels=1, out_channels=8, kernel_size=9, stride=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2),
            torch.nn.Conv2d(in_channels=8, out_channels=16, kernel_size=5, stride=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2)
        )
        
        # Fully connected classifier
        self.classifier = torch.nn.Sequential(
            torch.nn.Flatten(),
            torch.nn.Linear(16 * 2 * 2, 15),
            torch.nn.ReLU(),
            torch.nn.Linear(15, n_classes)
        )

    def forward(self, x):
        x = self.feature_extractor(x)
        logits = self.classifier(x)
        probs = torch.nn.functional.softmax(logits, dim=1)
        return probs


## Subquestion 2:
The performance is quite poor for this relatively simple dataset. It is likely that the training loss has not quite converged yet after 100 epochs. Reason could be that the fewer number of free parameters is restricting its ability too learn. Thus it needs more epochs to train. Indeed a larger number of epochs improves the model. 

In [12]:
conv_model_07 = ConvNet(2)
conv_model_07.to(device)

conv_loss_fn_07 = torch.nn.CrossEntropyLoss()
conv_optimizer_07 = torch.optim.Adam(params =  conv_model_07.parameters(), lr=1e-05)

for epoch in range(100):
    train(conv_model_07, epoch, train_loader_07, conv_optimizer_07, conv_loss_fn_07)
print()

# Calculate accuracy on test data
conv_test_accuracy_07 = validate(test_loader_07, conv_model_07)
print(f'Test Accuracy: {conv_test_accuracy_07:.2f}%')

Epoch: 3, loss: 0.693621  [   32/  200]

Epoch: 99, loss: 0.679119  [  200/  200]
Test Accuracy: 95.33%


In [13]:
# Filter and limit the datasets
labels_017 = {0, 1, 7}

train_dataset_017 = filter_and_limit_dataset(train_dataset, labels_list = labels_017)
test_dataset_017 = filter_and_limit_dataset(test_dataset, labels_list = labels_017)

train_loader_017 = DataLoader(train_dataset_017, batch_size=32, shuffle=True)
test_loader_017 = DataLoader(test_dataset_017, batch_size=32)

## Subquestion 3
Once again the performance of the MLP is quite a bit better when we use 100 epochs for both times.

In [14]:
mlp_model_017 = SimpleBinaryMLPMultiple(n_classes=3)
mlp_model_017.to(device)

mlp_loss_fn_017 = torch.nn.CrossEntropyLoss()
mlp_optimizer_017 = torch.optim.Adam(params =  mlp_model_017.parameters(), lr=1e-05)

for epoch in range(100):
    train(mlp_model_017, epoch, train_loader_017, mlp_optimizer_017, mlp_loss_fn_017)
print()

# Calculate accuracy on test data
mlp_test_accuracy_017 = validate(test_loader_017, mlp_model_017)
print(f'Test Accuracy: {mlp_test_accuracy_017:.2f}%')

Epoch: 99, loss: 0.978091  [  300/  300]
Test Accuracy: 71.22%


In [15]:
conv_model_017 = ConvNet(n_classes=3)
conv_model_017.to(device)

conv_loss_fn_017 = torch.nn.CrossEntropyLoss()
conv_optimizer_017 = torch.optim.Adam(params =  conv_model_017.parameters(), lr=1e-05)

for epoch in range(100):
    train(conv_model_017, epoch, train_loader_017, conv_optimizer_017, conv_loss_fn_017)
print()

# Calculate accuracy on test data
conv_test_accuracy_017 = validate(test_loader_017, conv_model_017)
print(f'Test Accuracy: {conv_test_accuracy_017:.2f}%')

Epoch: 99, loss: 1.076388  [  300/  300]
Test Accuracy: 63.56%


# Subquestion 4

The mlp model seem to perform better on a fewer number of epochs. However, the fact that the convolutional network has less parameters than the MLP model, it is more attractive to use the conv model. Furthermore, the same number of epochs for both models still take a different amount of computation time and power as there are less parameters. Thus, for a fairer comparison, the number of epochs may be increased for the convolutional model. In that case the convolutional model will attain similar performance and will still be cheaper to train.. Furthermore, the Conv network is shift invariant whereas the FC network is not. So you need to train of way more data samples. The MLP layer has to learn the feature for each point whereas the conv model recognizes the important of, e.g., horizontal lines.

# Question 2


In [16]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

x_train_raw = pd.read_csv("data/Data2/x_train.csv")
x_test_raw = pd.read_csv("data/Data2/x_test.csv")
y_train_raw = pd.read_csv("data/Data2/y_train.csv")
y_test_raw = pd.read_csv("data/Data2/y_test.csv")

In [17]:
y_train_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   y       1000 non-null   int64
dtypes: int64(1)
memory usage: 7.9 KB


## Subquestion 1
The means, and other statistics seem quite different in multiple variables for the different classes. It should be relatively easy to separate the classes.

In [18]:
pd.set_option('display.max_rows', 500)

# Combine x_train and y_train into a single DataFrame
combined = x_train_raw.copy()
combined['y'] = y_train_raw['y']

# Group by the class and describe
combined.groupby('y').describe().transpose()

Unnamed: 0,y,0,1
V1,count,500.0,500.0
V1,mean,-0.04594,2.98958
V1,std,1.057304,1.007417
V1,min,-2.996949,-0.23261
V1,25%,-0.790081,2.361952
V1,50%,-0.021257,2.947828
V1,75%,0.689333,3.576546
V1,max,3.055742,5.861592
V2,count,500.0,500.0
V2,mean,0.022644,2.987018


In [19]:
# Normalize features
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train_raw)
x_test_scaled = scaler.transform(x_test_raw)

# Convert to PyTorch tensors
x_train = torch.tensor(x_train_scaled, dtype=torch.float32)
x_test = torch.tensor(x_test_scaled, dtype=torch.float32)
y_train = torch.tensor(y_train_raw.values, dtype=torch.long)
y_test = torch.tensor(y_test_raw.values, dtype=torch.long)

# Determine the input size and number of classes
input_size = x_train.shape[1]
num_classes = len(y_train.unique())

In [20]:
import torch.nn as nn
import torch.nn.functional as F

class MLP(nn.Module):
    def __init__(self, input_size, num_classes):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, 30) 
        self.fc2 = nn.Linear(30, 15)
        self.fc3 = nn.Linear(15, num_classes)
        self.softmax = torch.nn.Softmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.softmax(self.fc3(x))
        return x

In [21]:
def train2(model, criterion, optimizer, x_train, y_train, epochs=100):
    model.train()

    # Ensure y_train is a 1D tensor if it's not already
    if y_train.ndim > 1:
        y_train = y_train.squeeze()
    

    for epoch in range(epochs):
        # Forward pass
        optimizer.zero_grad()
        outputs = model(x_train)
        loss = criterion(outputs, y_train)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (epoch+1) % 10 == 0:
            print(f'\rEpoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}', end="")
    print()




In [22]:
def evaluate2(model, x_test, y_test):
    model.eval()  # Set the model to evaluation mode
    if y_test.ndim > 1:
        y_test = y_test.squeeze()
    with torch.no_grad():  # Turn off gradients for validation, saves memory and computations
        correct = 0
        total = 0
        outputs = model(x_test)
        predicted = torch.argmax(outputs.data, 1)  # Get the predicted classes
        total += y_test.size(0)
        correct += (predicted == y_test).sum().item()

    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.4f}%')




## Subquestion 2

The model is performing quite poor (and very random, initialization matters a lot), (sometimes) only around 50 percent correct meaning that the model is about as good as a random guess. With the descriptive statistics, this result is quite shocking.

In [23]:
# Create the MLP model
model = MLP(input_size, num_classes=num_classes)
model.to(device)

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params =  model.parameters(), lr=1e-05)

train2(model, loss_fn, optimizer, x_train, y_train, epochs=100)

# Call the evaluate function
evaluate2(model, x_test, y_test)

Epoch [100/100], Loss: 0.6978
Accuracy: 41.9000%


In [24]:
class DenseAutoencoder(nn.Module):
    def __init__(self, input_size):
        super(DenseAutoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 10),
            nn.Tanh(),
            nn.Linear(10, 6),
            nn.Tanh(),
            nn.Linear(6, 4),
            nn.Tanh()
        )
        self.decoder = nn.Sequential(
            nn.Linear(4, 6),
            nn.Tanh(),
            nn.Linear(6, 10),
            nn.Tanh(),
            nn.Linear(10, input_size)  # Output size is same as input size
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

## Subquestion 3
The model seems to learn quite well how the data is constructed. It does quite well in autoencoding the (test) data.

In [25]:
def evaluate3(model, criterion, x_test, y_test):
    model.eval()    
    with torch.no_grad():
        # Forward pass
        outputs = model(x_test)
        loss = criterion(outputs, y_test)
        print(f'Eval Loss: {loss.item():.4f}')



In [30]:
# Assuming input size of 10 (to match your layer sizes)
input_size = 10
model = DenseAutoencoder(input_size)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

train2(model, criterion, optimizer, x_train, x_train)
evaluate3(model, criterion, x_test, x_test)


Epoch [100/100], Loss: 0.2475
Eval Loss: 0.2392


In [27]:
class EncodedMLP(nn.Module):
    def __init__(self, num_classes=2):
        super(EncodedMLP, self).__init__()
        self.fc1 = nn.Linear(4, 8)  # First layer
        self.fc2 = nn.Linear(8, 4)  # Second layer
        self.fc3 = nn.Linear(4, num_classes)  # final
        self.softmax = torch.nn.Softmax(dim=1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = self.softmax(self.fc3(x))
        return x




In [28]:
def encode_data(model, x):
    # Extract the first three layers from the encoder part of the model
    with torch.no_grad():
        x = model.encoder(x)
    return x



## Subquestion 4
This model that takes the encoded form of the data seems to be performing better than the original NN. We can draw parallels with transfer learning that this data profits from understanding the data in a different way, without training the previously learned transformations. However, I must confess that I do not have a definite answer as why the encoded model performs much better than the previous model.

In [29]:
# Instantiate the model
encoded_model = EncodedMLP()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(encoded_model.parameters(), lr=1e-3)

# Transform training and testing data
x_train_transformed = encode_data(model, x_train)
x_test_transformed = encode_data(model, x_test)

# Train the third model
train2(encoded_model, criterion, optimizer, x_train_transformed, y_train)

# Evaluate the model
evaluate2(encoded_model, x_test_transformed, y_test)


Epoch [100/100], Loss: 0.5861
Accuracy: 100.0000%
