## READ DATA

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import torchvision
import torchvision.transforms as transforms
from torchvision.utils import make_grid
from sklearn import metrics

sns.set_style("whitegrid")

def accuracy(target, pred):
    return metrics.accuracy_score(target.detach().cpu().numpy(), pred.detach().cpu().numpy())

def compute_confusion_matrix(target, pred, normalize=None):
    return metrics.confusion_matrix(
        target.detach().cpu().numpy(), 
        pred.detach().cpu().numpy(),
        normalize=normalize
    )

def show_image(img):
    img = img.detach().cpu()
    img = img / 2 + 0.5   # unnormalize
    with sns.axes_style("white"):
        plt.figure(figsize=(8, 8))
        plt.imshow(img.permute((1, 2, 0)).numpy())
        plt.axis('off')
        plt.show()

In [None]:
pip install pickle5

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pickle5
  Downloading pickle5-0.0.12-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (256 kB)
[K     |████████████████████████████████| 256 kB 8.5 MB/s 
[?25hInstalling collected packages: pickle5
Successfully installed pickle5-0.0.12


In [None]:
#Mount drive
import os.path
from os import path

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pickle5 as pickle

import pandas as pd
print(pd.__version__)

path = "/content/drive/My Drive/ColabNotebooks/projekt03/data_10000_images_v_135.pkl" #Directory Frederikke
#path = "/content/drive/My Drive/projekt03/data_10000_images_v_135.pkl" # Directory Jonathan
#path = "/content/drive/My Drive/ColabNotebooks/projekt03/data_10000_images_v_135.pkl" # Directory Johanne

with open(path, "rb") as fh:
  subset = pickle.load(fh)

1.3.5


In [None]:
#convet labels into int
labels_names = subset.moa.unique()
label_to_label_id = {}
for i in range(len(labels_names)):
    label_to_label_id[labels_names[i]] = i

subset["labels_ID"] = subset.apply(lambda row: label_to_label_id[row.moa], axis=1)
subset["labels_ID"].dtypes

dtype('int64')

In [None]:
#make new dataframe that dataloader can read
data = subset[['Images','labels_ID']].copy()
print(data.shape)

(10010, 2)


In [None]:
#make dataset class
#https://pytorch.org/tutorials/beginner/basics/data_tutorial.html

import os
import pandas as pd
import numpy as np
from torchvision.io import read_image
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt

class BBBC(Dataset):
    def __init__(self, annotations_file,typeFlag, transform=None, target_transform=None):
        self.transform = transform
        self.target_transform = target_transform
        
        train_size = int(np.ceil(0.8*annotations_file.shape[0]))
        test_size = int(annotations_file.shape[0]-train_size)

        if typeFlag == 'train':
          img_labels = annotations_file.head(train_size)
        elif typeFlag == 'test':
          img_labels = annotations_file.head(-test_size)
        
        self.img_labels = img_labels

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        image = self.img_labels.iloc[idx, 0]
        
        label = self.img_labels.iloc[idx, 1]
        
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        
        image = image.view(-1).type(torch.FloatTensor)
        image = (image-image.min())/(image.max()-image.min())
        image = image.view(3,68,68).type(torch.FloatTensor)

        return image, int(label)

In [None]:
#Read data with dataloader

from torch.utils.data import DataLoader

data = data.sample(frac=2002/len(data)).reset_index(drop=True)

n_classes = len(set(data["labels_ID"]))

dset_train = BBBC(data,"train")
dset_test = BBBC(data,"test")

batch_size = 128

train_loader = DataLoader(dset_train,batch_size=batch_size)
test_loader = DataLoader(dset_test,batch_size=batch_size)

In [None]:
x, y = next(iter(train_loader))
print("Batch dimension (B x C x H x W):", x.shape)

Batch dimension (B x C x H x W): torch.Size([128, 3, 68, 68])


In [None]:
class PrintSize(nn.Module):
    """Utility module to print current shape of a Tensor in Sequential, only at the first pass."""
    
    first = True
    
    def forward(self, x):
        if self.first:
            print(f"Size: {x.size()}")
            self.first = False
        return x


class Model(nn.Module):
    def __init__(self, num_classes,):
        super().__init__()
        self.num_classes = num_classes
        
        #your code here

        self.net = nn.Sequential(
            nn.Conv2d(3,12,3,padding=1),
            nn.BatchNorm2d(12),
            nn.ReLU(),
            torch.nn.Dropout(0.1),
            nn.Conv2d(12,12,3,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            torch.nn.Dropout(0.1),
            nn.Conv2d(12, 64, 3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Flatten(1,-1),

            nn.Linear(in_features=16384, out_features=num_classes)

            #nn.Conv2d(3, 16,3, stride=1, padding = 1),
            #nn.ReLU(),

            #nn.Conv2d(16, 32,3, stride=1, padding = 1),
            #nn.MaxPool2d(2, stride=2),
            #nn.ReLU(),

            #nn.Conv2d(32,64,3, stride=1, padding = 1),
            #nn.MaxPool2d(2, stride=2),
            #nn.ReLU(),

            #nn.Flatten(1,-1),
            #nn.Linear(17*17*64, 100),
            #nn.ReLU(),
            #nn.Linear(100, num_classes))
            )
    def forward(self, x):
        return self.net(x)


model = Model(n_classes)
device = torch.device('cpu')  # use cuda or cpu
model.to(device)
print(model)

Model(
  (net): Sequential(
    (0): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
    (4): Conv2d(12, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Dropout(p=0.1, inplace=False)
    (8): Conv2d(12, 64, kernel_size=(3, 3), stride=(1, 1))
    (9): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Flatten(start_dim=1, end_dim=-1)
    (13): Linear(in_features=16384, out_features=13, bias=True)
  )
)


In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [None]:
# Test the forward pass with dummy data
out = model(torch.randn(2, 3, 68, 68, device=device))
print("Output shape:", out.size())
print(f"Output logits:\n{out.detach().cpu().numpy()}")
print(f"Output probabilities:\n{out.softmax(1).detach().cpu().numpy()}")

Output shape: torch.Size([2, 13])
Output logits:
[[-0.4928476  -1.132153    0.8413316  -0.0532299   0.5823067   0.06716603
  -0.7126588   0.5920632   1.0905435   0.8280716   0.86114055  0.3057353
  -0.5799852 ]
 [-0.34384996 -1.6320821   0.9210721  -0.54154235  0.23263094 -0.6963913
  -1.3192508  -0.4157709   0.7289497   0.97881067  0.93925965  0.95906085
  -1.2883418 ]]
Output probabilities:
[[0.03231046 0.0170489  0.12267888 0.0501495  0.09468402 0.05656581
  0.02593464 0.09561233 0.1573987  0.1210629  0.12513325 0.07180642
  0.02961418]
 [0.04204654 0.01159467 0.14896321 0.03450433 0.07483294 0.0295545
  0.01585328 0.0391287  0.12292531 0.15781727 0.15169726 0.15473099
  0.01635094]]


In [None]:
batch_size = 128
num_epochs = 10
validation_every_steps = 500

step = 0
model.train()

train_accuracies = []
valid_accuracies = []

for epoch in range(num_epochs):
    print(epoch)
    train_accuracies_batches = []
    
    for inputs, targets in train_loader:
        #inputs, targets = inputs.to(device), targets.to(device)
        inputs, targets = next(iter(train_loader))

        # Forward pass, compute gradients, perform one training step.
        # Your code here!
        # Forward pass.
        output = model(inputs)
        
        # Compute loss.
        loss = loss_fn(output, targets)
        
        # Clean up gradients from the model.
        optimizer.zero_grad()
        
        # Compute gradients based on the loss from the current batch (backpropagation).
        loss.backward()
        
        # Take one optimizer step using the gradients computed in the previous step.
        optimizer.step()
        
        step += 1

        # Increment step counter
        step += 1
        
        # Compute accuracy.
        predictions = output.max(1)[1]
        train_accuracies_batches.append(accuracy(targets, predictions))
        
        if step % validation_every_steps == 0:
            
            # Append average training accuracy to list.
            train_accuracies.append(np.mean(train_accuracies_batches))
            
            train_accuracies_batches = []
        
            # Compute accuracies on validation set.
            valid_accuracies_batches = []
            with torch.no_grad():
                model.eval()
                for inputs, targets in test_loader:
                    inputs, targets = inputs.to(device), targets.to(device)
                    output = model(inputs)
                    loss = loss_fn(output, targets)

                    predictions = output.max(1)[1]

                    # Multiply by len(x) because the final batch of DataLoader may be smaller (drop_last=False).
                    valid_accuracies_batches.append(accuracy(targets, predictions) * len(inputs))

                model.train()
                
            # Append average validation accuracy to list.
            valid_accuracies.append(np.sum(valid_accuracies_batches) / len(dset_test))
     
            print(f"Step {step:<5}   training accuracy: {train_accuracies[-1]}")
            print(f"             test accuracy: {valid_accuracies[-1]}")

print("Finished training.")

In [None]:
# Evaluate test set
confusion_matrix = np.zeros((n_classes, n_classes))
with torch.no_grad():
    model.eval()
    test_accuracies = []
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        output = model(inputs)
        loss = loss_fn(output, targets)

        predictions = output.max(1)[1]

        # Multiply by len(inputs) because the final batch of DataLoader may be smaller (drop_last=True).
        test_accuracies.append(accuracy(targets, predictions) * len(inputs))
        
        confusion_matrix += compute_confusion_matrix(targets, predictions)

    test_accuracy = np.sum(test_accuracies) / len(dset_test)
    
    model.train()

In [None]:
print(f"Test accuracy: {test_accuracy:.3f}")

In [None]:

test_accuracys = []
for i in range(10):
          #Read data with dataloader

          from torch.utils.data import DataLoader

          data = data.sample(frac=2002/len(data)).reset_index(drop=True)

          n_classes = len(set(data["labels_ID"]))

          dset_train = BBBC(data,"train")
          dset_test = BBBC(data,"test")

          batch_size = 128

          train_loader = DataLoader(dset_train,batch_size=batch_size)
          test_loader = DataLoader(dset_test,batch_size=batch_size)

          model = Model(n_classes)
          device = torch.device('cpu')  # use cuda or cpu
          model.to(device)

          loss_fn = nn.CrossEntropyLoss()
          optimizer = optim.Adam(model.parameters(), lr=1e-3)

          batch_size = 128
          num_epochs = 50
          validation_every_steps = 500

          step = 0
          model.train()

          train_accuracies = []
          valid_accuracies = []

          for epoch in range(num_epochs):
              print(f"Model {i}: {epoch}/{num_epochs}")
              train_accuracies_batches = []
              
              for inputs, targets in train_loader:
                  #inputs, targets = inputs.to(device), targets.to(device)
                  inputs, targets = next(iter(train_loader))

                  # Forward pass, compute gradients, perform one training step.
                  # Your code here!
                  # Forward pass.
                  output = model(inputs)
                  
                  # Compute loss.
                  loss = loss_fn(output, targets)
                  
                  # Clean up gradients from the model.
                  optimizer.zero_grad()
                  
                  # Compute gradients based on the loss from the current batch (backpropagation).
                  loss.backward()
                  
                  # Take one optimizer step using the gradients computed in the previous step.
                  optimizer.step()
                  
                  step += 1

                  # Increment step counter
                  step += 1
                  
                  # Compute accuracy.
                  predictions = output.max(1)[1]
                  train_accuracies_batches.append(accuracy(targets, predictions))
                  
                  if step % validation_every_steps == 0:
                      
                      # Append average training accuracy to list.
                      train_accuracies.append(np.mean(train_accuracies_batches))
                      
                      train_accuracies_batches = []
                  
                      # Compute accuracies on validation set.
                      valid_accuracies_batches = []
                      with torch.no_grad():
                          model.eval()
                          for inputs, targets in test_loader:
                              inputs, targets = inputs.to(device), targets.to(device)
                              output = model(inputs)
                              loss = loss_fn(output, targets)

                              predictions = output.max(1)[1]

                              # Multiply by len(x) because the final batch of DataLoader may be smaller (drop_last=False).
                              valid_accuracies_batches.append(accuracy(targets, predictions) * len(inputs))

                          model.train()
                          
                      # Append average validation accuracy to list.
                      valid_accuracies.append(np.sum(valid_accuracies_batches) / len(dset_test))
              
                      print(f"Step {step:<5}   training accuracy: {train_accuracies[-1]}")
                      print(f"             test accuracy: {valid_accuracies[-1]}")

          print("Finished training.")

          # Evaluate test set
          confusion_matrix = np.zeros((n_classes, n_classes))
          with torch.no_grad():
              model.eval()
              test_accuracies = []
              for inputs, targets in test_loader:
                  inputs, targets = inputs.to(device), targets.to(device)
                  output = model(inputs)
                  loss = loss_fn(output, targets)

                  predictions = output.max(1)[1]

                  # Multiply by len(inputs) because the final batch of DataLoader may be smaller (drop_last=True).
                  test_accuracies.append(accuracy(targets, predictions) * len(inputs))
                  
                  confusion_matrix += compute_confusion_matrix(targets, predictions)

              test_accuracy = np.sum(test_accuracies) / len(dset_test)
              
              model.train()
              print(f"run{i}: {test_accuracy}")
              test_accuracys.append(test_accuracy)
  
print(test_accuracys)

Model 0: 0/50
Model 0: 1/50
Model 0: 2/50
Model 0: 3/50
Model 0: 4/50
Model 0: 5/50
Model 0: 6/50
Model 0: 7/50
Model 0: 8/50
Model 0: 9/50
Model 0: 10/50
Model 0: 11/50
Model 0: 12/50
Model 0: 13/50
Model 0: 14/50
Model 0: 15/50
Model 0: 16/50
Model 0: 17/50
Model 0: 18/50
Model 0: 19/50
Step 500     training accuracy: 1.0
             test accuracy: 0.2222222222222222
Model 0: 20/50
Model 0: 21/50
Model 0: 22/50
Model 0: 23/50
Model 0: 24/50
Model 0: 25/50
Model 0: 26/50
Model 0: 27/50
Model 0: 28/50
Model 0: 29/50
Model 0: 30/50
Model 0: 31/50
Model 0: 32/50
Model 0: 33/50
Model 0: 34/50
Model 0: 35/50
Model 0: 36/50
Model 0: 37/50
Model 0: 38/50
Step 1000    training accuracy: 1.0
             test accuracy: 0.22534332084893882
Model 0: 39/50
Model 0: 40/50
Model 0: 41/50
Model 0: 42/50
Model 0: 43/50
Model 0: 44/50
Model 0: 45/50
Model 0: 46/50
Model 0: 47/50
Model 0: 48/50
Model 0: 49/50
Finished training.
run0: 0.22846441947565543
Model 1: 0/50


KeyboardInterrupt: ignored