In [193]:
import torch
import pandas as pd
import os
import numpy as np
from PIL import Image
from torch.utils.data import Dataset
from torchvision.transforms import transforms
from pathlib import Path


class MimicCxrJpg(Dataset):
    """
    Mimic-CXR-JPG Database
    Todo: Insert references to the database here!
    """

    def __init__(self, root, path_csv, n_way, k_shot, k_query, num_episodes, resize=224):
        self.dict_labels = {
            'Enlarged Cardiomediastinum': 0,
            'Fracture': 1,
            'Lung Lesion': 2,
            'Lung Opacity': 3,
            'Pleural Effusion': 4,
            'Pneumothorax': 5
        }
        self.root = root
        csv_data = pd.read_csv(path_csv) # Raw CSV data
        data = csv_data[csv_data.split == "novel"] # Filters for novel classes
        self.data = data.assign(
            labels = data["labels"].apply(lambda x: dict_labels[x])) # Converts classes to numeric values 
        self.n_way = n_way
        self.k_shot = k_shot
        self.k_query = k_query
        self.num_episodes = num_episodes
        self.resize = resize
        self.transform = transforms.Compose([lambda x: Image.open(x).convert('L'),
                                             transforms.Resize((self.resize, self.resize)),
                                             transforms.ToTensor()
                                             ])
        
        # Create Episodes
        self.support_episodes = []  # List of training episodes (support set)
        self.query_episodes = []  # List of testing episodes (query set)
        for i in range(self.num_episodes):  # for each batch
            # 1.select n_way classes randomly
            selected_cls = np.random.choice(len(self.dict_labels), self.n_way, False)  # no duplicate
            np.random.shuffle(selected_cls)
            df_support = pd.DataFrame()
            df_query = pd.DataFrame()
            for cls in selected_cls:
                df_cls = self.data[self.data.labels == cls]
                # 2. select k_shot + k_query for each class
                selected_idx = np.random.choice(len(df_cls), self.k_shot + self.k_query, False)
                np.random.shuffle(selected_idx)
                
                # Index of samples for the support and query set
                support_idx = selected_idx[:self.k_shot]
                query_idx = selected_idx[self.k_shot:]

                df_support = df_support.append(df_cls.iloc[support_idx])
                df_query = df_query.append(df_cls.iloc[query_idx])
                
            # Shuffle the indexes so that it is no longer ordered by class
            df_support = df_support.sample(frac=1)
            df_query = df_query.sample(frac=1)
            
            self.support_episodes.append(df_support)
            self.query_episodes.append(df_query)


    def __len__(self):
        return self.num_episodes
    
    def __getitem__(self, idx):
        support_set = self.support_episodes[idx]
        query_set = self.query_episodes[idx]
        
        support_labels = support_set.labels.tolist()
        query_labels = query_set.labels.tolist()
        
        support_imgs = []
        for i in range(len(support_set)):
            img_path = os.path.join(self.root, support_set.iloc[i,0])
            support_imgs.append(self.transform(img_path))
            
        query_imgs = []
        for j in range(len(query_set)):
            img_path = os.path.join(self.root, query_set.iloc[j,0])
            query_imgs.append(self.transform(img_path))
        
        return support_imgs, support_labels, query_imgs, query_labels

In [194]:
root = '../../../../scratch/rl80/mimic-cxr-jpg-2.0.0.physionet.org/files/'
path_csv = '../splits/splits.csv'
n_way = 3
k_shot = 5
k_query = 16
num_episodes = 5
mimic_dataset = MimicCxrJpg(root, path_csv, n_way, k_shot, k_query, num_episodes)
a_x, a_y, b_x, b_y = mimic_dataset[2]

In [205]:
a_x

[tensor([[[0.9647, 0.9647, 0.9608,  ..., 0.7804, 0.7843, 0.7882],
          [0.9647, 0.9647, 0.9608,  ..., 0.7686, 0.7765, 0.7804],
          [0.9647, 0.9608, 0.9608,  ..., 0.7608, 0.7647, 0.7647],
          ...,
          [0.9255, 0.9176, 0.9059,  ..., 0.0196, 0.0275, 0.0235],
          [0.9255, 0.9176, 0.9059,  ..., 0.0196, 0.0235, 0.0235],
          [0.9255, 0.9176, 0.9020,  ..., 0.0157, 0.0275, 0.0235]]]),
 tensor([[[0.0078, 0.0039, 0.0039,  ..., 0.0000, 0.0000, 0.0000],
          [0.0118, 0.0039, 0.0078,  ..., 0.0000, 0.0000, 0.0000],
          [0.0118, 0.0235, 0.0431,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]]]),
 tensor([[[0.0000, 0.0000, 0.0000,  ..., 0.0314, 0.0314, 0.0235],
          [0.0000, 0.0000, 0.0000,  ..., 0.0314, 0.0314, 0.0235],
          [0.0000, 0.0000, 0.0000,  ...,

In [178]:
test = []

In [184]:
test.append(1)
test

[1, 1, 1, 1, 1]

In [3]:
dict_labels = {
                'Enlarged Cardiomediastinum': 0,
                'Fracture': 1,
                'Lung Lesion': 2,
                'Lung Opacity': 3,
                'Pleural Effusion': 4,
                'Pneumothorax': 5
            }

In [27]:
import torch
import pandas as pd
import os
from PIL import Image
from torch.utils.data import Dataset
from torchvision.transforms import transforms
from pathlib import Path
path_csv = '../splits/splits.csv'
csvdata = pd.read_csv(path_csv)
data = csvdata[csvdata.split == 'novel']

In [110]:
data

Unnamed: 0,file_path,labels,split
4,p10/p10000935/s51178377/9b314ad7-fbcb0422-6db6...,3,novel
9,p10/p10001884/s51817555/d5f12914-f14ffd99-3ff2...,3,novel
16,p10/p10002131/s52823782/051b7911-cb00aec9-0b30...,4,novel
17,p10/p10002428/s58851198/7254cc41-a1055fd0-f387...,4,novel
19,p10/p10002428/s59414737/d999236f-95dcb8b7-a4d2...,4,novel
...,...,...,...
41721,p19/p19997367/s52970039/c1ccde76-401595e8-8491...,4,novel
41722,p19/p19997367/s54627229/3cdb5c10-c5739070-049e...,3,novel
41723,p19/p19997367/s55871455/95e0d765-6f99aff8-f618...,4,novel
41724,p19/p19997367/s57837670/d4bde55e-a2cd4d30-a733...,4,novel


In [112]:
test = []

In [119]:
test.append(data)

In [120]:
test[0]

Unnamed: 0,file_path,labels,split
4,p10/p10000935/s51178377/9b314ad7-fbcb0422-6db6...,3,novel
9,p10/p10001884/s51817555/d5f12914-f14ffd99-3ff2...,3,novel
16,p10/p10002131/s52823782/051b7911-cb00aec9-0b30...,4,novel
17,p10/p10002428/s58851198/7254cc41-a1055fd0-f387...,4,novel
19,p10/p10002428/s59414737/d999236f-95dcb8b7-a4d2...,4,novel
...,...,...,...
41721,p19/p19997367/s52970039/c1ccde76-401595e8-8491...,4,novel
41722,p19/p19997367/s54627229/3cdb5c10-c5739070-049e...,3,novel
41723,p19/p19997367/s55871455/95e0d765-6f99aff8-f618...,4,novel
41724,p19/p19997367/s57837670/d4bde55e-a2cd4d30-a733...,4,novel


In [143]:
df = pd.DataFrame()

In [148]:
df

In [151]:
df = df.append(data)

In [154]:
df

Unnamed: 0,file_path,labels,split
4,p10/p10000935/s51178377/9b314ad7-fbcb0422-6db6...,3,novel
9,p10/p10001884/s51817555/d5f12914-f14ffd99-3ff2...,3,novel
16,p10/p10002131/s52823782/051b7911-cb00aec9-0b30...,4,novel
17,p10/p10002428/s58851198/7254cc41-a1055fd0-f387...,4,novel
19,p10/p10002428/s59414737/d999236f-95dcb8b7-a4d2...,4,novel
...,...,...,...
41721,p19/p19997367/s52970039/c1ccde76-401595e8-8491...,4,novel
41722,p19/p19997367/s54627229/3cdb5c10-c5739070-049e...,3,novel
41723,p19/p19997367/s55871455/95e0d765-6f99aff8-f618...,4,novel
41724,p19/p19997367/s57837670/d4bde55e-a2cd4d30-a733...,4,novel


In [153]:
df.sample(frac=1)

Unnamed: 0,file_path,labels,split
7931,p11/p11932181/s55708104/8894a073-a8fc7130-d4c1...,5,novel
30236,p17/p17244595/s50621269/8367f554-5e2539cc-1c68...,5,novel
24020,p15/p15743237/s53994422/3f76bc58-6f89f293-63d6...,0,novel
10922,p12/p12631015/s58165263/f77faadf-3aabf3c3-ec4b...,5,novel
24358,p15/p15831913/s56552894/89baf81c-aeaf21ac-3a69...,4,novel
...,...,...,...
26552,p16/p16365360/s58741074/aee8c658-1b822a71-b18d...,3,novel
38903,p19/p19296173/s54092537/91b14024-ed12c56c-a79e...,3,novel
10550,p12/p12542450/s56557285/83711845-a20b5408-f42c...,3,novel
30765,p17/p17360908/s57748144/de89d6cc-774c5ffa-9e9b...,4,novel
