# Defining dataset

In [1]:
#| default_exp nb_03_dataset

In [2]:
#|export
from pathlib import Path 

In [3]:
#|export
import pandas as pd

In [4]:
#| export
import matplotlib.pyplot as plt

In [5]:
#|export
from ml.nb_02_patching import *

## Data

In [6]:
df = pd.read_pickle("data/df_all_2022_10_06.pkl")
df.shape

(1180, 95)

In [7]:
#| export
import torch.utils.data as data_utils

In [8]:
#| export
from PIL import Image

In [9]:
#| export
import torchvision.transforms as transforms

In [10]:
#| export
import numpy as np

In [11]:
#| export
import torch

In [12]:
#| export
import torchvision

In [13]:
#| export
import tqdm

## Dataset

In [14]:
#| export
class PatchedDataSet(data_utils.Dataset):

    """Returns a batch of N patches with the specified target in dataframe"""
    
    def __init__(self, 
                 img_path, # Path with images   
                 df, # pandas dataframe
                 y_col, # df column for target
                 stime_col, #df column with survival time
                 N, # number of patches
                 mean, #mean for normalization
                 std,  #std for normalization
                 trfms=None #list of transforms
                ):
        self.img_path = img_path
        self.df = df
        self.df.reset_index()
        self.y_col = y_col
        self.stime_col = stime_col
        self.N = N
        self.mean, self.std = mean, std
        self.trfm = trfms
        self.img_ids = self.get_img_ids()
        
    def get_img_ids(self):
        col = self.img_path.str()+"/"+self.df.TMA_ID+"_"+self.df.TMASpot
        return col.tolist()
                
        
    def __getitem__(self, idx):
        img_id = self.img_ids[idx]
        patch_paths = [img_id+"_"+str(i)+".png" for i in range(self.N)]
        patches = []
        
        transform = transforms.Compose([
            transforms.ToTensor()
            ])
        
        for p in patch_paths:
            img = Image.open(p)
            img = transform(img)
            
            if self.trfm is not None: 
                img = self.trfm(img)
            
            img = transforms.Normalize(self.mean, self.std)(img)
            patches.append(img)
            
        patches = torch.stack(patches, dim=0)
        
        return patches.clone(), torch.tensor(self.df.iloc[idx][self.y_col]) #.detach()
    
    def __len__(self):
        return len(self.img_ids)

In [15]:
#| export
p_outx = Path("/media/dimi/TOSHIBA EXT/patched_images")
p_outy = Path("/media/dimi/TOSHIBA EXT/patched_masks")

In [16]:
#| export
mean_img, std_img = [0.8868493, 0.7803772, 0.87521], [0.07292725, 0.09504553, 0.05757239]
mean_mask, std_mask = [0.04432359, 0.04432359, 0.04432359], [0.02483896, 0.02483896, 0.02483896]

In [17]:
df.loc[df["G"]=="2", "G"] = 2
df = df.loc[df["G"]!="1,2"]
print(df.shape)

(1177, 95)


In [18]:
df["G"] = df["G"]-1

In [19]:
df.G.unique()

array([1, 0, 2], dtype=object)

In [20]:
df = df.sample(n=200)
ds = PatchedDataSet(p_outx, df, "G", "stime_10y_dom", 12, mean_img, std_img)

In [21]:
len(ds[0])

2

In [22]:
ds[0][0].shape

torch.Size([12, 3, 128, 128])

In [23]:
ds[0][1]

tensor(0)

In [24]:
ds[0][1].shape

torch.Size([])

In [25]:
from torch.utils.data import DataLoader

In [26]:
bs = 4
dl = DataLoader(ds, batch_size=bs) 

In [27]:
for X, y in dl:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([4, 12, 3, 128, 128])
Shape of y: torch.Size([4]) torch.int64


In [28]:
from torch import nn

In [29]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [30]:
class AdaptiveConcatPool2d(nn.Module):
    "Layer that concats `AdaptiveAvgPool2d` and `AdaptiveMaxPool2d`"
    def __init__(self, size=None):
        super(AdaptiveConcatPool2d, self).__init__()
        self.size = size or 1
        self.ap = nn.AdaptiveAvgPool2d(self.size)
        self.mp = nn.AdaptiveMaxPool2d(self.size)
    def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1)

In [31]:
# Define model
class NeuralNetwork(nn.Module):
    def __init__(self, arch='resnext50_32x4d_ssl', n=3, pre=True):
        super().__init__()
        m = torch.hub.load('facebookresearch/semi-supervised-ImageNet1K-models', arch)
        self.enc = nn.Sequential(*list(m.children())[:-2])       
        nc = list(m.children())[-1].in_features
        self.head = nn.Sequential(AdaptiveConcatPool2d(),nn.Flatten(),nn.Linear(2*nc,512),
                            nn.Mish(),nn.BatchNorm1d(512), nn.Dropout(0.5),nn.Linear(512,n))

    def forward(self, *x):
        shape = x[0].shape  #4, 12, 3, 128, 128
        bs, N, c, sz_x, sz_y = shape[0], shape[1], shape[2], shape[3], shape[4]
        x = torch.stack(x,1).view(-1, c, sz_x, sz_y)
        x = self.enc(x) #x: bs*N x C x 4 x 4
        shape = x.shape 
        x = x.view(-1,N,shape[1],shape[2],shape[3]).permute(0,2,1,3,4).contiguous()\
             .view(bs,shape[1],shape[2]*N,shape[3])  #x: bs x C x N*4 x 4
        x = self.head(x) #x: bs x N
        return x

In [32]:
model = NeuralNetwork().to(device)

Using cache found in /home/dimi/.cache/torch/hub/facebookresearch_semi-supervised-ImageNet1K-models_master


In [33]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [34]:
from fastai.data.core import DataLoaders

In [None]:
dls = DataLoaders.from_dsets(train_ds, valid_ds)

In [35]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        y = y.type(torch.LongTensor) 
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 10 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [None]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            y = y.type(torch.LongTensor) 
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [36]:
epochs = 1
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(dl, model, loss_fn, optimizer)
print("Done!")

Epoch 1
-------------------------------
tensor([1, 1, 1, 1], device='cuda:0')
tensor([[ 0.2069,  0.4046,  0.0559],
        [-0.7266,  0.1618,  0.4810],
        [-1.0261, -0.3195, -0.1811],
        [ 0.7072, -0.5115,  0.4948]], device='cuda:0',
       grad_fn=<AddmmBackward0>)
tensor([1, 2, 1, 1], device='cuda:0')
tensor([[ 0.6951, -0.5888, -0.5273],
        [ 0.2523, -0.5889,  0.6086],
        [-0.7350, -0.0066, -0.8416],
        [ 0.6509, -0.7908,  0.6060]], device='cuda:0',
       grad_fn=<AddmmBackward0>)
tensor([1, 1, 1, 1], device='cuda:0')
tensor([[-1.0739, -0.8324,  2.7159],
        [ 0.1001,  0.7186, -1.5184],
        [ 0.1861, -0.1318, -0.4231],
        [-0.3501, -0.2506, -0.4150]], device='cuda:0',
       grad_fn=<AddmmBackward0>)
tensor([1, 1, 0, 1], device='cuda:0')
tensor([[ 0.7810, -1.1832,  0.6785],
        [ 0.4082,  1.2784,  0.3549],
        [-1.1485,  0.1416,  0.1205],
        [ 1.2448,  0.3804,  0.3426]], device='cuda:0',
       grad_fn=<AddmmBackward0>)
tensor([1, 1

tensor([1, 0, 1, 0], device='cuda:0')
tensor([[-0.1176,  0.8290, -0.4827],
        [ 0.3556, -0.0141, -1.3136],
        [ 0.3518,  0.9605, -0.6741],
        [-0.3703,  0.4548, -0.2588]], device='cuda:0',
       grad_fn=<AddmmBackward0>)
tensor([1, 1, 1, 0], device='cuda:0')
tensor([[-0.4375,  0.7729,  0.8238],
        [ 0.1329, -0.5159, -0.5805],
        [ 0.1083,  0.1209,  0.3634],
        [-0.3977, -0.5174,  1.5060]], device='cuda:0',
       grad_fn=<AddmmBackward0>)
tensor([1, 0, 1, 1], device='cuda:0')
tensor([[-1.2716,  0.4035,  0.9005],
        [ 0.3969, -1.0425,  0.8369],
        [-0.9764,  0.8475,  0.3792],
        [ 0.7822,  0.2189,  0.0409]], device='cuda:0',
       grad_fn=<AddmmBackward0>)
tensor([1, 1, 1, 1], device='cuda:0')
tensor([[ 0.4296, -0.2620,  0.4962],
        [ 0.1883,  0.1316, -0.1055],
        [ 1.3895, -1.4807,  0.8120],
        [-1.0143,  1.0145, -1.2136]], device='cuda:0',
       grad_fn=<AddmmBackward0>)
tensor([1, 1, 1, 1], device='cuda:0')
tensor([[ 0.80

In [17]:
#| hide
import nbdev; nbdev.nbdev_export()