In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# download the data
#!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00229/Skin_NonSkin.txt

--2022-11-09 09:59:16--  https://archive.ics.uci.edu/ml/machine-learning-databases/00229/Skin_NonSkin.txt
Loaded CA certificate '/etc/ssl/certs/ca-certificates.crt'
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3400818 (3.2M) [application/x-httpd-php]
Saving to: ‘Skin_NonSkin.txt’


2022-11-09 09:59:20 (1.05 MB/s) - ‘Skin_NonSkin.txt’ saved [3400818/3400818]



In [3]:
df = pd.read_csv('Skin_NonSkin.txt', delimiter="\t", header=None, names=['B', 'G', 'R', 'label'])

In [4]:
x_train, x_test, y_train, y_test = train_test_split(df[['R','B','G']].values, df['label'].values, test_size=0.2, random_state=0)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((196045, 3), (49012, 3), (196045,), (49012,))

In [6]:
scaler = StandardScaler().fit(x_train)

In [7]:
x_train_scaled = scaler.transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [8]:
x_train_scaled.shape, x_test_scaled.shape

((196045, 3), (49012, 3))

In [64]:
from dataclasses import dataclass
from typing import Tuple
import torch
import numpy as np

@dataclass(eq=False)
class SkinDataset(torch.utils.data.Dataset):
    data: np.ndarray
    labels: np.ndarray

    def __new__(cls, *args, **kwargs):
        obj = object.__new__(cls)
        torch.utils.data.Dataset.__init__(obj)
        return obj

    def __post_init__(self):
        self.data = self.data.astype(np.float32)
        self.labels = self.labels - 1
        
    def __len__(self) -> int:
        return self.data.shape[0]
    
    def __getitem__(self, idx: int) -> Tuple[np.ndarray, int]:
        return self.data[idx, ::], self.labels[idx]

In [65]:
ds_train = SkinDataset(x_train_scaled, y_train)
ds_test = SkinDataset(x_test_scaled, y_test)

In [66]:
from torch.utils.data import DataLoader

BATCH_SIZE: int = 256

dl_train = DataLoader(ds_train, shuffle=True, batch_size=BATCH_SIZE, drop_last=False)
dl_test = DataLoader(ds_test, shuffle=False, batch_size=BATCH_SIZE, drop_last=False)

In [106]:
from typing import Any
import pytorch_lightning as pl
import torchmetrics

class SkinMLP(pl.LightningModule):

    def __init__(self, loss: callable) -> None:
        super().__init__()
        self.mlp = torch.nn.Sequential(
            torch.nn.Linear(3, 100),
            torch.nn.ReLU(),
            torch.nn.Linear(100, 500),
            torch.nn.ReLU(),
            torch.nn.Linear(500, 2),
        )
        self.loss = loss


    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.mlp(x)

    def _step(self, batch) -> torch.Tensor:
        x, y = batch
        pred = self.forward(x)
        loss = self.loss(pred, y)
        return pred, loss

    def training_step(self, batch) -> torch.Tensor:
        pred, loss = self._step(batch)
        self.log("train/loss", loss)
        pred = torch.argmax(torch.nn.functional.softmax(pred), dim=1)
        auc = torchmetrics.functional.auc(pred, batch[-1], reorder=True)
        self.log("train/auc", auc)
        return loss

    def on_test_epoch_start(self) -> None:
        self.auc = torchmetrics.AUC(reorder=True)

    def test_step(self, batch: torch.Tensor, batch_idx: int) -> torch.Tensor:
        pred, loss = self._step(batch)
        self.log("test", loss)
        pred = torch.argmax(torch.nn.functional.softmax(pred), dim=1)
        self.auc.update(pred, batch[-1])

    def test_epoch_end(self, outputs) -> None:
        print(self.auc.compute())

    def configure_optimizers(self) -> Any:
        optim = torch.optim.Adam(self.parameters())
        return optim

In [107]:
loss = torch.nn.CrossEntropyLoss()
model = SkinMLP(loss)
x, y = next(iter(dl_train))

trainer = pl.Trainer(max_epochs=10, enable_progress_bar=True)

trainer.fit(model, train_dataloaders=dl_train)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name | Type             | Params
------------------------------------------
0 | mlp  | Sequential       | 51.9 K
1 | loss | CrossEntropyLoss | 0     
------------------------------------------
51.9 K    Trainable params
0         Non-trainable params
51.9 K    Total params
0.208     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

  pred = torch.argmax(torch.nn.functional.softmax(pred), dim=1)
`Trainer.fit` stopped: `max_epochs=10` reached.


In [108]:
trainer.test(model, dataloaders=dl_test)

  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

  pred = torch.argmax(torch.nn.functional.softmax(pred), dim=1)


tensor(0.5000)
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
          test             0.0026423181407153606
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test': 0.0026423181407153606}]