In [1]:
seed = 42  # for reproducibility

# Imports
import os
import sys
import enum
import copy
import random
import tempfile
import warnings
import multiprocessing
import numpy as np
import pandas as pd
import nibabel as nib
import SimpleITK as sitk
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from IPython import display

# import visdom
from pathlib import Path
from tqdm import tqdm
from scipy import stats

import torch
import torch.nn.functional as F
from torchvision.utils import make_grid, save_image
torch.manual_seed(seed)
import torchio as tio
sys.path.append(str(Path().absolute().parent))

If you use TorchIO for your research, please cite the following paper:
Pérez-García et al., TorchIO: a Python library for efficient loading,
preprocessing, augmentation and patch-based sampling of medical images
in deep learning. Credits instructions: https://torchio.readthedocs.io/#credits



In [2]:
DDSM_mass_path = Path('/home/linardos/Datasets/DDSM/Images/CBIS-DDSM/Mass')
train_path = DDSM_mass_path.joinpath('Train')

In [3]:
csv_file = pd.read_csv(Path('/home/linardos/Datasets/DDSM/mass_case_description_train_set.csv'))
new_csv_file = pd.read_csv(Path('/home/linardos/Datasets/DDSM/mass_case_description_train_set.csv'))

In [4]:
csv_file

Unnamed: 0,patient_id,breast_density,left or right breast,image view,abnormality id,abnormality type,mass shape,mass margins,assessment,pathology,subtlety,image file path,cropped image file path,ROI mask file path
0,P_00001,3,LEFT,CC,1,mass,IRREGULAR-ARCHITECTURAL_DISTORTION,SPICULATED,4,MALIGNANT,4,Mass-Training_P_00001_LEFT_CC/1.3.6.1.4.1.9590...,Mass-Training_P_00001_LEFT_CC_1/1.3.6.1.4.1.95...,Mass-Training_P_00001_LEFT_CC_1/1.3.6.1.4.1.95...
1,P_00001,3,LEFT,MLO,1,mass,IRREGULAR-ARCHITECTURAL_DISTORTION,SPICULATED,4,MALIGNANT,4,Mass-Training_P_00001_LEFT_MLO/1.3.6.1.4.1.959...,Mass-Training_P_00001_LEFT_MLO_1/1.3.6.1.4.1.9...,Mass-Training_P_00001_LEFT_MLO_1/1.3.6.1.4.1.9...
2,P_00004,3,LEFT,CC,1,mass,ARCHITECTURAL_DISTORTION,ILL_DEFINED,4,BENIGN,3,Mass-Training_P_00004_LEFT_CC/1.3.6.1.4.1.9590...,Mass-Training_P_00004_LEFT_CC_1/1.3.6.1.4.1.95...,Mass-Training_P_00004_LEFT_CC_1/1.3.6.1.4.1.95...
3,P_00004,3,LEFT,MLO,1,mass,ARCHITECTURAL_DISTORTION,ILL_DEFINED,4,BENIGN,3,Mass-Training_P_00004_LEFT_MLO/1.3.6.1.4.1.959...,Mass-Training_P_00004_LEFT_MLO_1/1.3.6.1.4.1.9...,Mass-Training_P_00004_LEFT_MLO_1/1.3.6.1.4.1.9...
4,P_00004,3,RIGHT,MLO,1,mass,OVAL,CIRCUMSCRIBED,4,BENIGN,5,Mass-Training_P_00004_RIGHT_MLO/1.3.6.1.4.1.95...,Mass-Training_P_00004_RIGHT_MLO_1/1.3.6.1.4.1....,Mass-Training_P_00004_RIGHT_MLO_1/1.3.6.1.4.1....
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1313,P_02033,2,RIGHT,MLO,1,mass,IRREGULAR,ILL_DEFINED,3,MALIGNANT,4,Mass-Training_P_02033_RIGHT_MLO/1.3.6.1.4.1.95...,Mass-Training_P_02033_RIGHT_MLO_1/1.3.6.1.4.1....,Mass-Training_P_02033_RIGHT_MLO_1/1.3.6.1.4.1....
1314,P_02079,2,RIGHT,CC,1,mass,ROUND,SPICULATED,3,MALIGNANT,5,Mass-Training_P_02079_RIGHT_CC/1.3.6.1.4.1.959...,Mass-Training_P_02079_RIGHT_CC_1/1.3.6.1.4.1.9...,Mass-Training_P_02079_RIGHT_CC_1/1.3.6.1.4.1.9...
1315,P_02079,2,RIGHT,MLO,1,mass,ROUND,SPICULATED,3,MALIGNANT,5,Mass-Training_P_02079_RIGHT_MLO/1.3.6.1.4.1.95...,Mass-Training_P_02079_RIGHT_MLO_1/1.3.6.1.4.1....,Mass-Training_P_02079_RIGHT_MLO_1/1.3.6.1.4.1....
1316,P_02092,2,LEFT,CC,1,mass,IRREGULAR,SPICULATED,3,MALIGNANT,2,Mass-Training_P_02092_LEFT_CC/1.3.6.1.4.1.9590...,Mass-Training_P_02092_LEFT_CC_1/1.3.6.1.4.1.95...,Mass-Training_P_02092_LEFT_CC_1/1.3.6.1.4.1.95...


In [5]:
csv_file['image file path'][0]

'Mass-Training_P_00001_LEFT_CC/1.3.6.1.4.1.9590.100.1.2.422112722213189649807611434612228974994/1.3.6.1.4.1.9590.100.1.2.342386194811267636608694132590482924515/000000.dcm'

In [6]:
csv_file['image file path'][5]

'Mass-Training_P_00009_RIGHT_CC/1.3.6.1.4.1.9590.100.1.2.348094436212980762312744999743818171955/1.3.6.1.4.1.9590.100.1.2.392091931911637760938815694332198115839/000000.dcm'

In [7]:
new_csv_file['image file path'] = csv_file['image file path'].str.replace('Mass-Training_','')
new_csv_file['image file path'] = new_csv_file['image file path'].str.split('/', expand=True)[0]
new_csv_file['image file path'] = 'Train/' + new_csv_file['image file path'].astype(str) + '/scan.nii.gz'
new_csv_file['image file path'] = new_csv_file['image file path'].str.replace('_LEFT','/LEFT')
new_csv_file['image file path'] = new_csv_file['image file path'].str.replace('_RIGHT','/RIGHT')

In [8]:
new_csv_file['cropped image file path'] = csv_file['cropped image file path'].str.replace('Mass-Training_','')
new_csv_file['cropped image file path'] = new_csv_file['cropped image file path'].str.split('/', expand=True)[0]
new_csv_file['cropped image file path'] = 'Train/' + new_csv_file['cropped image file path'].astype(str) + '/roi_mass.nii.gz'
new_csv_file['cropped image file path'] = new_csv_file['cropped image file path'].str.replace('_1/r','/r')
new_csv_file['cropped image file path'] = new_csv_file['cropped image file path'].str.replace('_LEFT','/LEFT')
new_csv_file['cropped image file path'] = new_csv_file['cropped image file path'].str.replace('_RIGHT','/RIGHT')

In [9]:
new_csv_file['ROI mask file path'] = csv_file['ROI mask file path'].str.replace('Mass-Training_','')
new_csv_file['ROI mask file path'] = new_csv_file['ROI mask file path'].str.split('/', expand=True)[0]
new_csv_file['ROI mask file path'] = 'Train/' + new_csv_file['ROI mask file path'].astype(str) + '/mask_mass.nii.gz'
new_csv_file['ROI mask file path'] = new_csv_file['ROI mask file path'].str.replace('_1/m','/m')
new_csv_file['ROI mask file path'] = new_csv_file['ROI mask file path'].str.replace('_LEFT','/LEFT')
new_csv_file['ROI mask file path'] = new_csv_file['ROI mask file path'].str.replace('_RIGHT','/RIGHT')

In [10]:
new_csv_file['cropped image file path'] = new_csv_file['cropped image file path'].str.replace(f'CC/roi_mass.nii',f'CC/roi_mass_1.nii')
new_csv_file['cropped image file path'] = new_csv_file['cropped image file path'].str.replace(f'CC/mask_mass.nii',f'CC/mask_mass_1.nii')
new_csv_file['cropped image file path'] = new_csv_file['cropped image file path'].str.replace(f'MLO/roi_mass.nii',f'MLO/roi_mass_1.nii')
new_csv_file['cropped image file path'] = new_csv_file['cropped image file path'].str.replace(f'MLO/mask_mass.nii',f'MLO/mask_mass_1.nii')

new_csv_file['ROI mask file path'] = new_csv_file['ROI mask file path'].str.replace(f'CC/roi_mass.nii',f'CC/roi_mass_1.nii')
new_csv_file['ROI mask file path'] = new_csv_file['ROI mask file path'].str.replace(f'CC/mask_mass.nii',f'CC/mask_mass_1.nii')
new_csv_file['ROI mask file path'] = new_csv_file['ROI mask file path'].str.replace(f'MLO/roi_mass.nii',f'MLO/roi_mass_1.nii')
new_csv_file['ROI mask file path'] = new_csv_file['ROI mask file path'].str.replace(f'MLO/mask_mass.nii',f'MLO/mask_mass_1.nii')

for i in range(20):
    new_csv_file['cropped image file path'] = new_csv_file['cropped image file path'].str.replace(f'_{i}/roi_mass.nii',f'/roi_mass_{i}.nii')
    new_csv_file['ROI mask file path'] = new_csv_file['ROI mask file path'].str.replace(f'_{i}/mask_mass.nii',f'/mask_mass_{i}.nii')

new_csv_file.columns = new_csv_file.columns.str.replace(' ','_')

In [11]:
new_csv_file

Unnamed: 0,patient_id,breast_density,left_or_right_breast,image_view,abnormality_id,abnormality_type,mass_shape,mass_margins,assessment,pathology,subtlety,image_file_path,cropped_image_file_path,ROI_mask_file_path
0,P_00001,3,LEFT,CC,1,mass,IRREGULAR-ARCHITECTURAL_DISTORTION,SPICULATED,4,MALIGNANT,4,Train/P_00001/LEFT_CC/scan.nii.gz,Train/P_00001/LEFT_CC/roi_mass_1.nii.gz,Train/P_00001/LEFT_CC/mask_mass_1.nii.gz
1,P_00001,3,LEFT,MLO,1,mass,IRREGULAR-ARCHITECTURAL_DISTORTION,SPICULATED,4,MALIGNANT,4,Train/P_00001/LEFT_MLO/scan.nii.gz,Train/P_00001/LEFT_MLO/roi_mass_1.nii.gz,Train/P_00001/LEFT_MLO/mask_mass_1.nii.gz
2,P_00004,3,LEFT,CC,1,mass,ARCHITECTURAL_DISTORTION,ILL_DEFINED,4,BENIGN,3,Train/P_00004/LEFT_CC/scan.nii.gz,Train/P_00004/LEFT_CC/roi_mass_1.nii.gz,Train/P_00004/LEFT_CC/mask_mass_1.nii.gz
3,P_00004,3,LEFT,MLO,1,mass,ARCHITECTURAL_DISTORTION,ILL_DEFINED,4,BENIGN,3,Train/P_00004/LEFT_MLO/scan.nii.gz,Train/P_00004/LEFT_MLO/roi_mass_1.nii.gz,Train/P_00004/LEFT_MLO/mask_mass_1.nii.gz
4,P_00004,3,RIGHT,MLO,1,mass,OVAL,CIRCUMSCRIBED,4,BENIGN,5,Train/P_00004/RIGHT_MLO/scan.nii.gz,Train/P_00004/RIGHT_MLO/roi_mass_1.nii.gz,Train/P_00004/RIGHT_MLO/mask_mass_1.nii.gz
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1313,P_02033,2,RIGHT,MLO,1,mass,IRREGULAR,ILL_DEFINED,3,MALIGNANT,4,Train/P_02033/RIGHT_MLO/scan.nii.gz,Train/P_02033/RIGHT_MLO/roi_mass_1.nii.gz,Train/P_02033/RIGHT_MLO/mask_mass_1.nii.gz
1314,P_02079,2,RIGHT,CC,1,mass,ROUND,SPICULATED,3,MALIGNANT,5,Train/P_02079/RIGHT_CC/scan.nii.gz,Train/P_02079/RIGHT_CC/roi_mass_1.nii.gz,Train/P_02079/RIGHT_CC/mask_mass_1.nii.gz
1315,P_02079,2,RIGHT,MLO,1,mass,ROUND,SPICULATED,3,MALIGNANT,5,Train/P_02079/RIGHT_MLO/scan.nii.gz,Train/P_02079/RIGHT_MLO/roi_mass_1.nii.gz,Train/P_02079/RIGHT_MLO/mask_mass_1.nii.gz
1316,P_02092,2,LEFT,CC,1,mass,IRREGULAR,SPICULATED,3,MALIGNANT,2,Train/P_02092/LEFT_CC/scan.nii.gz,Train/P_02092/LEFT_CC/roi_mass_1.nii.gz,Train/P_02092/LEFT_CC/mask_mass_1.nii.gz


In [12]:
new_csv_file.to_csv('/home/linardos/Datasets/DDSM/processed_mass_case_description_train_set.csv')