In [1]:
import torch
import torch.optim as optim

import pandas as pd
from sklearn.manifold import TSNE
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    ConfusionMatrixDisplay,
)
from xgboost import XGBClassifier

from torch.utils.data import DataLoader, TensorDataset
from torchvision import transforms
from torchinfo import summary

from src.datasets import IrmaDataset
from src.transforms import MinMaxScaler, Sobel, Laplacian, GammaCorrection, HistogramEqualizer
from src.models import VAE, PVAE
from src.utils.model_utils import train_val_split, get_latent, predict
from src.utils.visualization import (
    plot_reconstructions,
    plot_2d,
    sample_images,
    plot_history,
)
from src.utils.training_funcs import (
    train,
    validate,
    pred_kl_reconstruction_loss,
    kl_reconstruction_loss,
    get_metrics_str,
    pred_loss,
)

In [2]:
batch_size = 32

In [25]:
features_to_df = lambda features: pd.DataFrame([{'path': path, **{f'x{i}': a for i, a in enumerate(latent)}, 'class': int(target)} for path, latent, target in zip(*features)])

# Latent Features

## Simple Dataset

In [3]:
dataset = IrmaDataset(
    transform=transforms.Compose(
        [
            transforms.ToTensor(),
            MinMaxScaler()
        ]
    ),
    return_images=True,
    return_path=True
)

datasets = train_val_split(dataset, 0.1, random_state=7)
datasets['test'] = datasets['val']
datasets = {**datasets, **train_val_split(datasets['train'], 0.1 / 0.9, random_state=7)}

dataloaders = {
    x: DataLoader(datasets[x], batch_size, shuffle=True)
    for x in ['train', 'test', 'val']
}

len(datasets['train']), len(datasets['test']), len(datasets['val'])

(4018, 503, 503)

In [4]:
datasets['test'][0]

(tensor([[[0.3462, 0.3269, 0.3269,  ..., 0.2115, 0.2115, 0.1923],
          [0.3269, 0.3269, 0.3077,  ..., 0.2500, 0.1923, 0.2115],
          [0.2885, 0.3077, 0.3654,  ..., 0.2885, 0.2308, 0.2308],
          ...,
          [0.4615, 0.4423, 0.4808,  ..., 0.3269, 0.3269, 0.3462],
          [0.4423, 0.4423, 0.5000,  ..., 0.3077, 0.3462, 0.3462],
          [0.4615, 0.4808, 0.5385,  ..., 0.4231, 0.3654, 0.3654]]]),
 1,
 'p_e_right_mlo(183)')

### VAE

In [32]:
kwargs, state = torch.load("models/final/simple/vae.pt")

model = VAE(**kwargs)
model.load_state_dict(state)

summary(model, input_size=(32, 1, 128, 128))

Layer (type:depth-idx)                   Output Shape              Param #
VAE                                      [32, 1, 128, 128]         --
├─Encoder: 1-1                           [32, 64]                  --
│    └─Sequential: 2-1                   [32, 64, 6, 6]            --
│    │    └─Conv2d: 3-1                  [32, 192, 124, 124]       4,992
│    │    └─BatchNorm2d: 3-2             [32, 192, 124, 124]       384
│    │    └─GELU: 3-3                    [32, 192, 124, 124]       --
│    │    └─MaxPool2d: 3-4               [32, 192, 62, 62]         --
│    │    └─Conv2d: 3-5                  [32, 128, 60, 60]         221,312
│    │    └─BatchNorm2d: 3-6             [32, 128, 60, 60]         256
│    │    └─GELU: 3-7                    [32, 128, 60, 60]         --
│    │    └─MaxPool2d: 3-8               [32, 128, 30, 30]         --
│    │    └─Conv2d: 3-9                  [32, 96, 28, 28]          110,688
│    │    └─BatchNorm2d: 3-10            [32, 96, 28, 28]          192

In [34]:
train_df = features_to_df(get_latent(model, dataloaders['train'], include_paths=True)).sort_values('path')
train_df.to_csv('./descriptors/simple/vae/train.csv', index=False)
val_df = features_to_df(get_latent(model, dataloaders['val'], include_paths=True)).sort_values('path')
val_df.to_csv('./descriptors/simple/vae/val.csv', index=False)
test_df = features_to_df(get_latent(model, dataloaders['test'], include_paths=True)).sort_values('path')
test_df.to_csv('./descriptors/simple/vae/test.csv', index=False)

### PVAE

In [35]:
kwargs, state = torch.load("models/final/simple/pvae.pt")

model = PVAE(**kwargs)
model.load_state_dict(state)

summary(model, input_size=(32, 1, 128, 128))

Layer (type:depth-idx)                   Output Shape              Param #
PVAE                                     [32, 1, 128, 128]         --
├─Encoder: 1-1                           [32, 64]                  --
│    └─Sequential: 2-1                   [32, 64, 6, 6]            --
│    │    └─Conv2d: 3-1                  [32, 192, 124, 124]       4,992
│    │    └─BatchNorm2d: 3-2             [32, 192, 124, 124]       384
│    │    └─GELU: 3-3                    [32, 192, 124, 124]       --
│    │    └─MaxPool2d: 3-4               [32, 192, 62, 62]         --
│    │    └─Conv2d: 3-5                  [32, 128, 60, 60]         221,312
│    │    └─BatchNorm2d: 3-6             [32, 128, 60, 60]         256
│    │    └─GELU: 3-7                    [32, 128, 60, 60]         --
│    │    └─MaxPool2d: 3-8               [32, 128, 30, 30]         --
│    │    └─Conv2d: 3-9                  [32, 96, 28, 28]          110,688
│    │    └─BatchNorm2d: 3-10            [32, 96, 28, 28]          192

In [37]:
train_df = features_to_df(get_latent(model, dataloaders['train'], include_paths=True)).sort_values('path')
train_df.to_csv('./descriptors/simple/pvae/train.csv', index=False)
val_df = features_to_df(get_latent(model, dataloaders['val'], include_paths=True)).sort_values('path')
val_df.to_csv('./descriptors/simple/pvae/val.csv', index=False)
test_df = features_to_df(get_latent(model, dataloaders['test'], include_paths=True)).sort_values('path')
test_df.to_csv('./descriptors/simple/pvae/test.csv', index=False)

## Sobel

In [43]:
dataset = IrmaDataset(
    transform=transforms.Compose(
        [
            transforms.ToTensor(),
            MinMaxScaler(),
            Sobel(),
            MinMaxScaler()
        ]
    ),
    return_images=True,
    return_path=True
)

datasets = train_val_split(dataset, 0.1, random_state=7)
datasets['test'] = datasets['val']
datasets = {**datasets, **train_val_split(datasets['train'], 0.1 / 0.9, random_state=7)}

dataloaders = {
    x: DataLoader(datasets[x], batch_size, shuffle=True)
    for x in ['train', 'test', 'val']
}

len(datasets['train']), len(datasets['test']), len(datasets['val'])

(4018, 503, 503)

In [44]:
datasets['test'][0]

(tensor([[[0.3462, 0.3269, 0.3269,  ..., 0.2115, 0.2115, 0.1923],
          [0.3269, 0.3269, 0.3077,  ..., 0.2500, 0.1923, 0.2115],
          [0.2885, 0.3077, 0.3654,  ..., 0.2885, 0.2308, 0.2308],
          ...,
          [0.4615, 0.4423, 0.4808,  ..., 0.3269, 0.3269, 0.3462],
          [0.4423, 0.4423, 0.5000,  ..., 0.3077, 0.3462, 0.3462],
          [0.4615, 0.4808, 0.5385,  ..., 0.4231, 0.3654, 0.3654]],
 
         [[1.0000, 1.0000, 1.0000,  ..., 0.9738, 0.8496, 0.8703],
          [1.0000, 0.0608, 0.0816,  ..., 0.3004, 0.2176, 0.8325],
          [1.0000, 0.1465, 0.0816,  ..., 0.1923, 0.3778, 1.0000],
          ...,
          [1.0000, 0.1904, 0.4903,  ..., 0.1654, 0.0608, 1.0000],
          [1.0000, 0.2507, 0.4385,  ..., 0.1923, 0.1961, 1.0000],
          [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]]]),
 1,
 'p_e_right_mlo(183)')

### VAE

In [45]:
kwargs, state = torch.load("models/final/sobel/vae.pt")

model = VAE(**kwargs)
model.load_state_dict(state)

summary(model, input_size=(32, 2, 128, 128))

Layer (type:depth-idx)                   Output Shape              Param #
VAE                                      [32, 2, 128, 128]         --
├─Encoder: 1-1                           [32, 64]                  --
│    └─Sequential: 2-1                   [32, 64, 6, 6]            --
│    │    └─Conv2d: 3-1                  [32, 192, 124, 124]       9,792
│    │    └─BatchNorm2d: 3-2             [32, 192, 124, 124]       384
│    │    └─GELU: 3-3                    [32, 192, 124, 124]       --
│    │    └─MaxPool2d: 3-4               [32, 192, 62, 62]         --
│    │    └─Conv2d: 3-5                  [32, 128, 60, 60]         221,312
│    │    └─BatchNorm2d: 3-6             [32, 128, 60, 60]         256
│    │    └─GELU: 3-7                    [32, 128, 60, 60]         --
│    │    └─MaxPool2d: 3-8               [32, 128, 30, 30]         --
│    │    └─Conv2d: 3-9                  [32, 96, 28, 28]          110,688
│    │    └─BatchNorm2d: 3-10            [32, 96, 28, 28]          192

In [46]:
train_df = features_to_df(get_latent(model, dataloaders['train'], include_paths=True)).sort_values('path')
train_df.to_csv('./descriptors/sobel/vae/train.csv', index=False)
val_df = features_to_df(get_latent(model, dataloaders['val'], include_paths=True)).sort_values('path')
val_df.to_csv('./descriptors/sobel/vae/val.csv', index=False)
test_df = features_to_df(get_latent(model, dataloaders['test'], include_paths=True)).sort_values('path')
test_df.to_csv('./descriptors/sobel/vae/test.csv', index=False)

### PVAE

In [47]:
kwargs, state = torch.load("models/final/sobel/pvae.pt")

model = PVAE(**kwargs)
model.load_state_dict(state)

summary(model, input_size=(32, 2, 128, 128))

Layer (type:depth-idx)                   Output Shape              Param #
PVAE                                     [32, 2, 128, 128]         --
├─Encoder: 1-1                           [32, 64]                  --
│    └─Sequential: 2-1                   [32, 64, 6, 6]            --
│    │    └─Conv2d: 3-1                  [32, 192, 124, 124]       9,792
│    │    └─BatchNorm2d: 3-2             [32, 192, 124, 124]       384
│    │    └─GELU: 3-3                    [32, 192, 124, 124]       --
│    │    └─MaxPool2d: 3-4               [32, 192, 62, 62]         --
│    │    └─Conv2d: 3-5                  [32, 128, 60, 60]         221,312
│    │    └─BatchNorm2d: 3-6             [32, 128, 60, 60]         256
│    │    └─GELU: 3-7                    [32, 128, 60, 60]         --
│    │    └─MaxPool2d: 3-8               [32, 128, 30, 30]         --
│    │    └─Conv2d: 3-9                  [32, 96, 28, 28]          110,688
│    │    └─BatchNorm2d: 3-10            [32, 96, 28, 28]          192

In [48]:
train_df = features_to_df(get_latent(model, dataloaders['train'], include_paths=True)).sort_values('path')
train_df.to_csv('./descriptors/sobel/pvae/train.csv', index=False)
val_df = features_to_df(get_latent(model, dataloaders['val'], include_paths=True)).sort_values('path')
val_df.to_csv('./descriptors/sobel/pvae/val.csv', index=False)
test_df = features_to_df(get_latent(model, dataloaders['test'], include_paths=True)).sort_values('path')
test_df.to_csv('./descriptors/sobel/pvae/test.csv', index=False)

## Laplacian

In [50]:
dataset = IrmaDataset(
    transform=transforms.Compose(
        [
            transforms.ToTensor(),
            MinMaxScaler(),
            Laplacian(),
            MinMaxScaler()
        ]
    ),
    return_images=True,
    return_path=True
)

datasets = train_val_split(dataset, 0.1, random_state=7)
datasets['test'] = datasets['val']
datasets = {**datasets, **train_val_split(datasets['train'], 0.1 / 0.9, random_state=7)}

dataloaders = {
    x: DataLoader(datasets[x], batch_size, shuffle=True)
    for x in ['train', 'test', 'val']
}

len(datasets['train']), len(datasets['test']), len(datasets['val'])

(4018, 503, 503)

In [51]:
datasets['test'][0]

(tensor([[[0.3462, 0.3269, 0.3269,  ..., 0.2115, 0.2115, 0.1923],
          [0.3269, 0.3269, 0.3077,  ..., 0.2500, 0.1923, 0.2115],
          [0.2885, 0.3077, 0.3654,  ..., 0.2885, 0.2308, 0.2308],
          ...,
          [0.4615, 0.4423, 0.4808,  ..., 0.3269, 0.3269, 0.3462],
          [0.4423, 0.4423, 0.5000,  ..., 0.3077, 0.3462, 0.3462],
          [0.4615, 0.4808, 0.5385,  ..., 0.4231, 0.3654, 0.3654]],
 
         [[0.7639, 0.4583, 0.4861,  ..., 0.3333, 0.4167, 0.4861],
          [0.4861, 0.2639, 0.1389,  ..., 0.2639, 0.1389, 0.4028],
          [0.3889, 0.1944, 0.3889,  ..., 0.3333, 0.1667, 0.3472],
          ...,
          [0.5833, 0.1667, 0.1389,  ..., 0.1944, 0.2083, 0.5139],
          [0.5278, 0.1667, 0.2361,  ..., 0.0972, 0.2639, 0.4722],
          [0.9028, 0.5833, 0.6806,  ..., 0.7222, 0.4722, 0.7778]]]),
 1,
 'p_e_right_mlo(183)')

### VAE

In [52]:
kwargs, state = torch.load("models/final/laplacian/vae.pt")

model = VAE(**kwargs)
model.load_state_dict(state)

summary(model, input_size=(32, 2, 128, 128))

Layer (type:depth-idx)                   Output Shape              Param #
VAE                                      [32, 2, 128, 128]         --
├─Encoder: 1-1                           [32, 64]                  --
│    └─Sequential: 2-1                   [32, 64, 6, 6]            --
│    │    └─Conv2d: 3-1                  [32, 192, 124, 124]       9,792
│    │    └─BatchNorm2d: 3-2             [32, 192, 124, 124]       384
│    │    └─GELU: 3-3                    [32, 192, 124, 124]       --
│    │    └─MaxPool2d: 3-4               [32, 192, 62, 62]         --
│    │    └─Conv2d: 3-5                  [32, 128, 60, 60]         221,312
│    │    └─BatchNorm2d: 3-6             [32, 128, 60, 60]         256
│    │    └─GELU: 3-7                    [32, 128, 60, 60]         --
│    │    └─MaxPool2d: 3-8               [32, 128, 30, 30]         --
│    │    └─Conv2d: 3-9                  [32, 96, 28, 28]          110,688
│    │    └─BatchNorm2d: 3-10            [32, 96, 28, 28]          192

In [53]:
train_df = features_to_df(get_latent(model, dataloaders['train'], include_paths=True)).sort_values('path')
train_df.to_csv('./descriptors/laplacian/vae/train.csv', index=False)
val_df = features_to_df(get_latent(model, dataloaders['val'], include_paths=True)).sort_values('path')
val_df.to_csv('./descriptors/laplacian/vae/val.csv', index=False)
test_df = features_to_df(get_latent(model, dataloaders['test'], include_paths=True)).sort_values('path')
test_df.to_csv('./descriptors/laplacian/vae/test.csv', index=False)

### PVAE

In [55]:
kwargs, state = torch.load("models/final/laplacian/pvae.pt")

model = PVAE(**kwargs)
model.load_state_dict(state)

summary(model, input_size=(32, 2, 128, 128))

Layer (type:depth-idx)                   Output Shape              Param #
PVAE                                     [32, 2, 128, 128]         --
├─Encoder: 1-1                           [32, 64]                  --
│    └─Sequential: 2-1                   [32, 64, 6, 6]            --
│    │    └─Conv2d: 3-1                  [32, 192, 124, 124]       9,792
│    │    └─BatchNorm2d: 3-2             [32, 192, 124, 124]       384
│    │    └─GELU: 3-3                    [32, 192, 124, 124]       --
│    │    └─MaxPool2d: 3-4               [32, 192, 62, 62]         --
│    │    └─Conv2d: 3-5                  [32, 128, 60, 60]         221,312
│    │    └─BatchNorm2d: 3-6             [32, 128, 60, 60]         256
│    │    └─GELU: 3-7                    [32, 128, 60, 60]         --
│    │    └─MaxPool2d: 3-8               [32, 128, 30, 30]         --
│    │    └─Conv2d: 3-9                  [32, 96, 28, 28]          110,688
│    │    └─BatchNorm2d: 3-10            [32, 96, 28, 28]          192

In [56]:
train_df = features_to_df(get_latent(model, dataloaders['train'], include_paths=True)).sort_values('path')
train_df.to_csv('./descriptors/laplacian/pvae/train.csv', index=False)
val_df = features_to_df(get_latent(model, dataloaders['val'], include_paths=True)).sort_values('path')
val_df.to_csv('./descriptors/laplacian/pvae/val.csv', index=False)
test_df = features_to_df(get_latent(model, dataloaders['test'], include_paths=True)).sort_values('path')
test_df.to_csv('./descriptors/laplacian/pvae/test.csv', index=False)

## Laplacian Inplace

In [57]:
dataset = IrmaDataset(
    transform=transforms.Compose(
        [
            transforms.ToTensor(),
            MinMaxScaler(),
            Laplacian('add', kernel='negative'),
            MinMaxScaler()
        ]
    ),
    return_images=True,
    return_path=True
)

datasets = train_val_split(dataset, 0.1, random_state=7)
datasets['test'] = datasets['val']
datasets = {**datasets, **train_val_split(datasets['train'], 0.1 / 0.9, random_state=7)}

dataloaders = {
    x: DataLoader(datasets[x], batch_size, shuffle=True)
    for x in ['train', 'test', 'val']
}

len(datasets['train']), len(datasets['test']), len(datasets['val'])

(4018, 503, 503)

In [58]:
datasets['test'][0]

(tensor([[[0.5812, 0.3846, 0.4017,  ..., 0.2564, 0.3077, 0.3419],
          [0.4017, 0.2650, 0.1795,  ..., 0.2308, 0.1282, 0.2991],
          [0.3248, 0.2137, 0.3590,  ..., 0.2906, 0.1624, 0.2735],
          ...,
          [0.5214, 0.2564, 0.2564,  ..., 0.2222, 0.2308, 0.4274],
          [0.4786, 0.2564, 0.3248,  ..., 0.1538, 0.2735, 0.4017],
          [0.7179, 0.5299, 0.6154,  ..., 0.5897, 0.4103, 0.5983]]]),
 1,
 'p_e_right_mlo(183)')

### VAE

In [59]:
kwargs, state = torch.load("models/final/laplacian_inplace/vae.pt")

model = VAE(**kwargs)
model.load_state_dict(state)

summary(model, input_size=(32, 1, 128, 128))

Layer (type:depth-idx)                   Output Shape              Param #
VAE                                      [32, 1, 128, 128]         --
├─Encoder: 1-1                           [32, 64]                  --
│    └─Sequential: 2-1                   [32, 64, 6, 6]            --
│    │    └─Conv2d: 3-1                  [32, 192, 124, 124]       4,992
│    │    └─BatchNorm2d: 3-2             [32, 192, 124, 124]       384
│    │    └─GELU: 3-3                    [32, 192, 124, 124]       --
│    │    └─MaxPool2d: 3-4               [32, 192, 62, 62]         --
│    │    └─Conv2d: 3-5                  [32, 128, 60, 60]         221,312
│    │    └─BatchNorm2d: 3-6             [32, 128, 60, 60]         256
│    │    └─GELU: 3-7                    [32, 128, 60, 60]         --
│    │    └─MaxPool2d: 3-8               [32, 128, 30, 30]         --
│    │    └─Conv2d: 3-9                  [32, 96, 28, 28]          110,688
│    │    └─BatchNorm2d: 3-10            [32, 96, 28, 28]          192

In [60]:
train_df = features_to_df(get_latent(model, dataloaders['train'], include_paths=True)).sort_values('path')
train_df.to_csv('./descriptors/laplacian_inplace/vae/train.csv', index=False)
val_df = features_to_df(get_latent(model, dataloaders['val'], include_paths=True)).sort_values('path')
val_df.to_csv('./descriptors/laplacian_inplace/vae/val.csv', index=False)
test_df = features_to_df(get_latent(model, dataloaders['test'], include_paths=True)).sort_values('path')
test_df.to_csv('./descriptors/laplacian_inplace/vae/test.csv', index=False)

### PVAE

In [61]:
kwargs, state = torch.load("models/final/laplacian_inplace/pvae.pt")

model = PVAE(**kwargs)
model.load_state_dict(state)

summary(model, input_size=(32, 1, 128, 128))

Layer (type:depth-idx)                   Output Shape              Param #
PVAE                                     [32, 1, 128, 128]         --
├─Encoder: 1-1                           [32, 64]                  --
│    └─Sequential: 2-1                   [32, 64, 6, 6]            --
│    │    └─Conv2d: 3-1                  [32, 192, 124, 124]       4,992
│    │    └─BatchNorm2d: 3-2             [32, 192, 124, 124]       384
│    │    └─GELU: 3-3                    [32, 192, 124, 124]       --
│    │    └─MaxPool2d: 3-4               [32, 192, 62, 62]         --
│    │    └─Conv2d: 3-5                  [32, 128, 60, 60]         221,312
│    │    └─BatchNorm2d: 3-6             [32, 128, 60, 60]         256
│    │    └─GELU: 3-7                    [32, 128, 60, 60]         --
│    │    └─MaxPool2d: 3-8               [32, 128, 30, 30]         --
│    │    └─Conv2d: 3-9                  [32, 96, 28, 28]          110,688
│    │    └─BatchNorm2d: 3-10            [32, 96, 28, 28]          192

In [62]:
train_df = features_to_df(get_latent(model, dataloaders['train'], include_paths=True)).sort_values('path')
train_df.to_csv('./descriptors/laplacian_inplace/pvae/train.csv', index=False)
val_df = features_to_df(get_latent(model, dataloaders['val'], include_paths=True)).sort_values('path')
val_df.to_csv('./descriptors/laplacian_inplace/pvae/val.csv', index=False)
test_df = features_to_df(get_latent(model, dataloaders['test'], include_paths=True)).sort_values('path')
test_df.to_csv('./descriptors/laplacian_inplace/pvae/test.csv', index=False)

# Texture Descriptors

In [74]:
import pyfeats

In [108]:
to_df = lambda features, labels: pd.DataFrame([{l: f for f, l in zip(feats, labels)} for feats in features])

In [77]:
dataset = IrmaDataset(
    # transform=transforms.Compose(
    #     [
    #         transforms.ToTensor(),
    #         MinMaxScaler()
    #     ]
    # ),
    return_images=True,
    return_path=True
)

datasets = train_val_split(dataset, 0.1, random_state=7)
datasets['test'] = datasets['val']
datasets = {**datasets, **train_val_split(datasets['train'], 0.1 / 0.9, random_state=7)}

len(datasets['train']), len(datasets['test']), len(datasets['val'])

(4018, 503, 503)

In [198]:
def extract_features(data, feature_extractor):
    features = []
    for image, label, path in data:
        feats, lbls = feature_extractor(image)

        features.append([path, *feats, label])
        # break
    
    return features, ['path', *lbls, 'class']

In [208]:
def get_features(image):
    features = []
    labels = []
    
    fs, ls = pyfeats.fos(image, None)
    features.extend(fs[6:8])
    labels.extend(ls[6:8])

    fs, ls = pyfeats.lbp_features(image, None, P=[8,16,24,64], R=[1,2,4,8])
    features.extend(fs)
    labels.extend(ls)

    fs, ls = pyfeats.fps(image, None)
    features.extend(fs)
    labels.extend(ls)

    fs, _, ls, _ = pyfeats.glcm_features(image)
    features.extend(fs)
    labels.extend(ls)

    fs, ls = pyfeats.glds_features(image, None)
    features.extend(fs)
    labels.extend(ls)

    fs, ls = pyfeats.sfm_features(image, None)
    features.extend(fs)
    labels.extend(ls)

    return features, labels


In [209]:
features, labels = extract_features(datasets['train'], get_features)
train_df = to_df(features, labels).sort_values('path')
train_df.to_csv('./descriptors/texture/train.csv', index=False)

features, labels = extract_features(datasets['val'], get_features)
val_df = to_df(features, labels).sort_values('path')
val_df.to_csv('./descriptors/texture/val.csv', index=False)

features, labels = extract_features(datasets['test'], get_features)
test_df = to_df(features, labels).sort_values('path')
test_df.to_csv('./descriptors/texture/test.csv', index=False)

In [210]:
test_df

Unnamed: 0,path,FOS_Energy,FOS_Entropy,LBP_R_1_P_8_energy,LBP_R_1_P_8_entropy,LBP_R_2_P_16_energy,LBP_R_2_P_16_entropy,LBP_R_4_P_24_energy,LBP_R_4_P_24_entropy,LBP_R_8_P_64_energy,...,GLDS_Homogeneity,GLDS_Contrast,GLDS_ASM,GLDS_Entopy,GLDS_Mean,SFM_Coarseness,SFM_Contrast,SFM_Periodicity,SFM_Roughness,class
135,p_d_left_cc(103),0.071268,2.836237,6.708676,-1.834438,14.257563,-2.596759,23.236863,-3.111787,63.082404,...,7873.754451,57555.25,69923019.0,-134422.171361,23178.75,41.657795,2.667277,0.463210,2.475731,0
295,p_d_left_cc(149),0.077834,2.723237,6.904665,-1.866315,14.754985,-2.640230,23.825054,-3.147492,63.972742,...,7795.203972,57734.00,69474394.5,-134373.333699,23381.00,45.544470,2.671315,0.443302,2.574935,0
38,p_d_left_cc(170),0.055384,3.034087,6.586726,-1.814970,13.942689,-2.569728,22.966288,-3.095781,63.035492,...,7923.298578,56217.75,70622543.5,-134599.706331,22937.75,40.428775,2.636226,0.534805,2.442729,0
146,p_d_left_cc(175),0.097196,2.513424,6.820831,-1.856192,14.318797,-2.603616,23.163462,-3.109378,63.168503,...,8802.155546,40653.50,80663884.5,-136800.153819,19141.50,51.221857,2.241675,0.458887,2.497748,0
493,p_d_left_cc(219),0.059191,3.031652,6.876430,-1.860569,14.634610,-2.629343,23.558677,-3.130950,63.322400,...,7602.755802,65114.50,67063379.5,-133607.847153,24587.00,41.727714,2.836977,0.452811,2.541767,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277,p_g_right_mlo(59),0.010156,4.730197,6.349571,-1.773225,13.083351,-2.497096,20.943987,-2.980388,56.100328,...,6479.987887,117780.00,53931514.5,-129549.951167,32666.00,24.773904,3.815952,0.617525,2.331944,3
73,p_g_right_mlo(65),0.052171,3.204647,6.813388,-1.860062,13.907658,-2.572900,21.675032,-3.026236,57.824152,...,10180.987062,25370.25,96755232.5,-139894.379773,14435.25,61.739432,1.770811,0.512116,2.419177,3
312,p_g_right_mlo(67),0.044875,3.293979,6.581122,-1.818890,13.402076,-2.527633,21.386783,-3.007933,58.747791,...,8920.472602,42236.75,81794590.0,-136914.781990,18910.25,45.413112,2.284979,0.533465,2.376018,3
247,p_g_right_mlo(88),0.031007,3.664337,6.836917,-1.854426,14.124339,-2.585852,22.462586,-3.067460,59.983002,...,7905.735427,54712.25,70877868.0,-134728.519635,22781.75,42.459766,2.600461,0.474790,2.499901,3
