In [28]:
from collections import OrderedDict

import yaml
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
# from torchvision.datasets import CIFAR10
import flwr as fl
from datetime import datetime
import importlib
import os
from pathlib import Path
from tqdm import tqdm
import numpy as np
import pickle
import sys
import pandas as pd
sys.path.append(str(Path().absolute().parent.parent))

from src.models import nets
from src.data_loader import ALLDataset
torch.manual_seed(42)

<torch._C.Generator at 0x7f2e181bc930>

In [29]:
with open('image_ids.pkl', 'rb') as handle:
    image_ids = pickle.load(handle)

In [30]:
len(image_ids['jarv']['train'])

6704

In [4]:
with open('image_ids_per_center/centralized_image_ids.pkl', 'rb') as handle:
    centralized_image_ids_dict = pickle.load(handle)
with open('image_ids_per_center/federated_image_ids.pkl', 'rb') as handle:
    federated_image_ids_dict = pickle.load(handle)

In [5]:
len(federated_image_ids_dict['jarv']['val'][1])

1676

In [6]:
len(federated_image_ids_dict['jarv']['train'][1])

6704

In [7]:
len(federated_image_ids_dict['jarv']['val'][1])+len(federated_image_ids_dict['jarv']['train'][1])

8380

In [8]:
'1.2.826.0.1.3680043.9.3218.1.1.1405750.1758.1511969095974.2523.0' in federated_image_ids_dict['jarv']['val'][1]

True

In [9]:
len(centralized_image_ids_dict['jarv']['val'][1])

1676

In [10]:
centralized_image_ids_dict['jarv']['val'][1][:5]

['1.2.826.0.1.3680043.9.3218.1.1.3863114.4093.1511396668137.8431.0',
 '1.2.826.0.1.3680043.9.3218.1.1.11559392.1924.1547344198622.108.0',
 '1.2.826.0.1.3680043.9.3218.1.1.12104711.2124.1573168534259.420.0',
 '1.2.826.0.1.3680043.9.3218.1.1.123573073.9584.1573193793839.58.0',
 '1.2.826.0.1.3680043.9.3218.1.1.12374673.1803.1573195530496.236.0']

In [11]:
jarv_csv="/home/akis-linardos/Datasets/OPTIMAM/jarv_info.csv"
stge_csv="/home/akis-linardos/Datasets/OPTIMAM/stge_info.csv"
inbreast_csv="/home/lidia-garrucho/datasets/INBREAST/INbreast_updated_cropped_breast.csv"
cmmd_csv='/home/akis-linardos/Datasets/CMMD/info.csv'
bcdr_d01_csv="/home/lidia-garrucho/datasets/BCDR/cropped/BCDR-D01_dataset/dataset_info.csv"
bcdr_d02_csv="/home/lidia-garrucho/datasets/BCDR/cropped/BCDR-D02_dataset/dataset_info.csv"
bcdr_dn01_csv="/home/lidia-garrucho/datasets/BCDR/cropped/BCDR-DN01_dataset/dataset_info.csv"

j_csv=pd.read_csv(jarv_csv)
s_csv=pd.read_csv(stge_csv)
i_csv=pd.read_csv(inbreast_csv)
c_csv=pd.read_csv(cmmd_csv)
b_d01_csv=pd.read_csv(bcdr_d01_csv)
b_d02_csv=pd.read_csv(bcdr_d02_csv)
bcdr_csv = pd.concat([b_d01_csv, b_d02_csv])
center_to_csv = {'jarv': j_csv, 'stge': s_csv, 'inbreast': i_csv, 'cmmd': c_csv, 'bcdr':bcdr_csv}

In [12]:
j_csv['study_id']=='1.2.826.0.1.3680043.9.3218.1.1.1447924.2040.1511973313408.4682.0'

0        False
1        False
2        False
3        False
4        False
         ...  
25962    False
25963    False
25964    False
25965    False
25966    False
Name: study_id, Length: 25967, dtype: bool

In [13]:
bcdr_csv

Unnamed: 0,patient_id,study_id,series,scan_path,laterality,view,density,age,classification,scan_width,...,lesion_id,segmentation_id,lesion_pathologies,mask_path,lesion_x1,lesion_x2,lesion_y1,lesion_y2,lw_x_points,lw_y_points
0,105,143,1,patient_105/study_143/img_105_143_1_LCC.png,LEFT,CC,1,51,Normal,3328,...,,,,,,,,,,
1,105,143,1,patient_105/study_143/img_105_143_1_LO.png,LEFT,MLO,1,51,Normal,3328,...,,,,,,,,,,
2,105,143,1,patient_105/study_143/img_105_143_1_RCC.png,RIGHT,CC,1,51,Malign,3328,...,161.0,312.0,"['nodule', 'microcalcification', 'stroma_disto...",patient_105/study_143/img_105_143_1_RCC_mask_i...,658.0,1292.0,1415.0,2013.0,2275 2276 2277 2278 2279 2280 2281 2281 2281 ...,2174 2174 2173 2172 2171 2170 2169 2168 2167 ...
3,105,143,1,patient_105/study_143/img_105_143_1_RO.png,RIGHT,MLO,1,51,Malign,3328,...,161.0,311.0,"['nodule', 'microcalcification', 'stroma_disto...",patient_105/study_143/img_105_143_1_RO_mask_id...,423.0,1015.0,1752.0,2444.0,1826 1825 1824 1823 1822 1821 1820 1819 1818 ...,1813 1812 1812 1812 1812 1813 1814 1815 1816 ...
4,108,146,1,patient_108/study_146/img_108_146_1_LCC.png,LEFT,CC,1,55,Normal,3328,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
747,95,129,1,patient_95/study_129/img_95_129_1_RO.png,RIGHT,MLO,4,46,Normal,3328,...,,,,,,,,,,
748,98,133,1,patient_98/study_133/img_98_133_1_LCC.png,LEFT,CC,3,71,Benign,3328,...,156.0,300.0,['calcification'],patient_98/study_133/img_98_133_1_LCC_mask_id_...,1208.0,1223.0,2269.0,2285.0,1217 1216 1215 1214 1213 1212 1211 1210 1209 ...,2744 2744 2744 2745 2746 2747 2748 2749 2750 ...
749,98,133,1,patient_98/study_133/img_98_133_1_LO.png,LEFT,MLO,3,71,Benign,3328,...,156.0,299.0,['calcification'],patient_98/study_133/img_98_133_1_LO_mask_id_1...,1181.0,1193.0,3008.0,3022.0,1185 1186 1187 1188 1189 1190 1191 1192 1192 ...,3026 3027 3026 3025 3024 3023 3022 3021 3020 ...
750,98,133,1,patient_98/study_133/img_98_133_1_RCC.png,RIGHT,CC,3,71,Normal,3328,...,,,,,,,,,,


In [14]:
j_csv.loc[j_csv['image_id'] == '1.2.826.0.1.3680043.9.3218.1.1.35791640.9441.1536882521446.102.0']['status'].item()

'Benign'

In [15]:
type('jarv')==str

True

In [16]:
len('BCDR-D01_dataset/')

17

In [18]:
center_to_csv['bcdr']['scan_path']

0      patient_105/study_143/img_105_143_1_LCC.png
1       patient_105/study_143/img_105_143_1_LO.png
2      patient_105/study_143/img_105_143_1_RCC.png
3       patient_105/study_143/img_105_143_1_RO.png
4      patient_108/study_146/img_108_146_1_LCC.png
                          ...                     
747       patient_95/study_129/img_95_129_1_RO.png
748      patient_98/study_133/img_98_133_1_LCC.png
749       patient_98/study_133/img_98_133_1_LO.png
750      patient_98/study_133/img_98_133_1_RCC.png
751       patient_98/study_133/img_98_133_1_RO.png
Name: scan_path, Length: 1018, dtype: object

In [19]:
center_to_csv['bcdr'].loc[center_to_csv['bcdr']['scan_path'] == 'patient_99/study_134/img_99_134_1_RCC.png']['classification'].iloc[0]

'Benign'

In [22]:
def label_for_ids(dataset_pd, list_of_ids, label_header='BIRADS', center='bcdr'):
    benign, malign = 0, 0
    for img_id in list_of_ids:
        if center == 'bcdr':
            the_status = dataset_pd.loc[dataset_pd['scan_path'][17:] == img_id[17:]]['classification'].iloc[0]
        else:
            the_status = dataset_pd.loc[dataset_pd['image_id'] == img_id][label_header].iloc[0]
        if the_status == 'Benign' or (label_header=='BIRADS' and the_status in [2,3]):
            benign+=1
        elif the_status == 'Malignant' or the_status == 'Malign' or (label_header=='BIRADS' and the_status>3):
            malign+=1
    return {"Benign":benign, "Malign":malign}

In [23]:
centers = ['jarv', 'stge', 'bcdr', 'cmmd', 'inbreast']

validation_sizes, validation_labels = {}, {}

for center in centers:
    val_l = centralized_image_ids_dict[center]['val'][1]
    validation_sizes[center]=len(val_l)
    if center == 'inbreast':
        label_header = 'BIRADS'
    else:
        label_header = 'status'
        
    validation_labels[center]=label_for_ids(center_to_csv[center], val_l, label_header=label_header, center=center)

ValueError: cannot reindex from a duplicate axis

In [None]:
center

In [None]:
validation_sizes

In [None]:
validation_labels

In [None]:
train_sizes, train_labels = {}, {}
for center in centers:
    train_l = centralized_image_ids_dict[center]['train'][1]
    train_sizes[center]=len(train_l)

In [None]:
train_sizes