In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# download the data
#!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00229/Skin_NonSkin.txt

--2022-11-09 09:59:16--  https://archive.ics.uci.edu/ml/machine-learning-databases/00229/Skin_NonSkin.txt
Loaded CA certificate '/etc/ssl/certs/ca-certificates.crt'
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3400818 (3.2M) [application/x-httpd-php]
Saving to: ‘Skin_NonSkin.txt’


2022-11-09 09:59:20 (1.05 MB/s) - ‘Skin_NonSkin.txt’ saved [3400818/3400818]



In [2]:
df = pd.read_csv('Skin_NonSkin.txt', delimiter="\t", header=None, names=['B', 'G', 'R', 'label'])

In [3]:
x_train, x_test, y_train, y_test = train_test_split(df[['R','B','G']].values, df['label'].values, test_size=0.2, random_state=0)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((196045, 3), (49012, 3), (196045,), (49012,))

In [4]:
scaler = StandardScaler().fit(x_train)

In [5]:
x_train_scaled = scaler.transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [6]:
x_train_scaled.shape, x_test_scaled.shape

((196045, 3), (49012, 3))

In [7]:
from dataclasses import dataclass
from typing import Tuple
import torch
import numpy as np

@dataclass(eq=False)
class SkinDataset(torch.utils.data.Dataset):
    data: np.ndarray
    labels: np.ndarray

    def __new__(cls, *args, **kwargs):
        obj = object.__new__(cls)
        torch.utils.data.Dataset.__init__(obj)
        return obj

    def __post_init__(self):
        self.data = self.data.astype(np.float32)
        self.labels = self.labels - 1
        
    def __len__(self) -> int:
        return self.data.shape[0]
    
    def __getitem__(self, idx: int) -> Tuple[np.ndarray, int]:
        return self.data[idx, ::], self.labels[idx]

In [8]:
ds_train = SkinDataset(x_train_scaled, y_train)
ds_test = SkinDataset(x_test_scaled, y_test)

In [9]:
from torch.utils.data import DataLoader

BATCH_SIZE: int = 256

dl_train = DataLoader(ds_train, shuffle=True, batch_size=BATCH_SIZE, drop_last=False)
dl_test = DataLoader(ds_test, shuffle=False, batch_size=BATCH_SIZE, drop_last=False)

In [30]:
from typing import Any

import pytorch_lightning as pl
import torchmetrics


class SkinMLP(pl.LightningModule):

    def __init__(self, loss: callable, lr: float) -> None:
        super().__init__()
        self.mlp = torch.nn.Sequential(
            torch.nn.Linear(3, 100),
            torch.nn.ReLU(),
            torch.nn.Linear(100, 500),
            torch.nn.ReLU(),
            torch.nn.Linear(500, 2),
        )
        self.loss = loss
        self.lr = lr

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.mlp(x)

    def _step(self, batch) -> torch.Tensor:
        x, y = batch
        pred = self.forward(x)
        loss = self.loss(pred, y)
        return pred, loss

    def training_step(self, batch) -> torch.Tensor:
        pred, loss = self._step(batch)
        self.log("train/loss", loss)
        pred = torch.nn.functional.softmax(pred)
        auroc = torchmetrics.functional.auroc(pred, batch[-1], num_classes=2)
        acc = torchmetrics.functional.accuracy(pred, batch[-1], num_classes=2)
        self.log("train/auc", auroc)
        self.log("train/acc", acc)
        return loss

    def on_test_epoch_start(self) -> None:
        self.auroc = torchmetrics.AUROC(num_classes=2)
        self.acc = torchmetrics.Accuracy(num_classes=2)

    def test_step(self, batch: torch.Tensor, batch_idx: int) -> torch.Tensor:
        pred, loss = self._step(batch)
        self.log("test", loss)
        pred = torch.nn.functional.softmax(pred)
        self.auroc.update(pred, batch[-1])
        self.acc.update(pred, batch[-1])

    def test_epoch_end(self, outputs) -> None:
        print(f"Test AUROC: {self.auroc.compute().data}")
        print(f"Test Accuracy: {self.acc.compute().data}")

    def configure_optimizers(self) -> Any:
        optim = torch.optim.Adam(self.parameters(), lr=self.lr)
        return optim

In [31]:
loss = torch.nn.CrossEntropyLoss()
epochs = 10

lrs = [1e-3, 1e-5]
for lr, i in enumerate(lrs):
    print(f"Training Model: {i}, with lr: {lr} for: {epochs}")
    model = SkinMLP(loss, lr)
    trainer = pl.Trainer(max_epochs=epochs, enable_progress_bar=True)
    trainer.fit(model, train_dataloaders=dl_train)
    trainer.test(model, dataloaders=dl_test)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: /home/paul/workspaces/PycharmProjects/practical_pytorch_dl/sheet_03/lightning_logs

  | Name | Type             | Params
------------------------------------------
0 | mlp  | Sequential       | 51.9 K
1 | loss | CrossEntropyLoss | 0     
------------------------------------------
51.9 K    Trainable params
0         Non-trainable params
51.9 K    Total params
0.208     Total estimated model params size (MB)


Training Model: 0.001, with lr: 0 for: 10


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

  pred = torch.nn.functional.softmax(pred)
`Trainer.fit` stopped: `max_epochs=10` reached.
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

  pred = torch.nn.functional.softmax(pred)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name | Type             | Params
------------------------------------------
0 | mlp  | Sequential       | 51.9 K
1 | loss | CrossEntropyLoss | 0     
------------------------------------------
51.9 K    Trainable params
0         Non-trainable params
51.9 K    Total params
0.208     Total estimated model params size (MB)


Test AUROC: 0.6158872246742249
Test Accuracy: 0.3644413650035858
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
          test              0.7193893790245056
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Training Model: 1e-05, with lr: 1 for: 10


  rank_zero_warn(


Training: 0it [00:00, ?it/s]

  pred = torch.nn.functional.softmax(pred)
`Trainer.fit` stopped: `max_epochs=10` reached.
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

  pred = torch.nn.functional.softmax(pred)


Test AUROC: 0.7555513381958008
Test Accuracy: 0.7929690480232239
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
          test              0.3867000937461853
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


# Results of the two Train and Test runs:

- Model 1 (red): with lr: `1e-3`
- Model 2 (blue): with lr: `1e-5`

Training Loss:  
![Training Loss](results/train_loss.png)  
Training AUROC:  
![Training AUROC](results/train_auroc.png)  
Training Accuracy:  
![Training Accuracy](results/train_acc.png)  

## Final Results on the Test set: 

Model 1 : with lr: `1e-3`  
Test AUROC: 0.6158  
Test Accuracy: 0.3644  

Model 2 : with lr: `1e-5`  
Test AUROC: 0.7555  
Test Accuracy: 0.7929  
