In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Organize location and labels for relevant images

In [2]:
import pydicom
import torch
from tqdm import tqdm
import time
from matplotlib import pyplot as plt

In [3]:
# Load in the training data target values
train_df = pd.read_csv('../input/unifesp-x-ray-body-part-classifier/train.csv')
train_df.head(10)

Unnamed: 0,SOPInstanceUID,Target
0,1.2.826.0.1.3680043.8.498.10025629581362719970...,0
1,1.2.826.0.1.3680043.8.498.10036150326276641158...,15
2,1.2.826.0.1.3680043.8.498.10038426859954986240...,12
3,1.2.826.0.1.3680043.8.498.10050991192143676483...,14
4,1.2.826.0.1.3680043.8.498.10053309524595490852...,3
5,1.2.826.0.1.3680043.8.498.10053755320637729867...,3
6,1.2.826.0.1.3680043.8.498.10062189329714053601...,14
7,1.2.826.0.1.3680043.8.498.10065930002825553435...,13 20
8,1.2.826.0.1.3680043.8.498.10072001800484199846...,3
9,1.2.826.0.1.3680043.8.498.10077219503857952412...,3


In [4]:
def dcmtag2table(folder, list_of_tags):
    """
    Create a Pandas DataFrame with the <list_of_tags> DICOM tags
    from the DICOM files in <folder>
    
    Parameters:
        folder (str): path for the folder to be recursively walked through looking for DICOM files
        list_of_tags (list of strings): a list of DICOM tags with no whitespaces
        
    Returns:
        df (DataFrame): table of DICOM tags from the files in <folder>
    """
    
    list_of_tags = list_of_tags.copy()
    items = []
    table = []
    filelist = []
    print("Listing files...")
    start = time.time()
    for root, dirs, files in os.walk(folder, topdown=False):
        for name in files:
            filelist.append(os.path.join(root,name)) #Get list of files that are present in the folder
    
    print("Time: " + str(time.time() - start))
    print("Reading files...")
    time.sleep(2)
    for _f in tqdm(filelist):
        try:
            ds = pydicom.dcmread(_f, stop_before_pixels=True) #Try to read each of the files, if they aren't pydicom compatible, run <except>
            items = [] # ^ The "stop_before_pixels" prevents the loading of the raw data, just metadata
            items.append(_f)

            for _tag in list_of_tags:
                if _tag in ds:
                    items.append(ds.data_element(_tag).value) #Gets the values for the paremeters defined in <list_of_tags>
                else:
                    items.append("Not found")

            table.append((items)) #make a table of the tag values
        except:
            print("Skipping non-DICOM: " + _f)

    
    list_of_tags.insert(0, "Filename") #add "Filename" to <list_of_tags>
    test = list(map(list, zip(*table)))
    dictone = {}

    for i, _tag in enumerate (list_of_tags):
        dictone[_tag] = test[i]

    df = pd.DataFrame(dictone)
    time.sleep(2)
    print("Finished.")
    return df
    

In [5]:
tags = ['PhotometricInterpretation','BitsAllocated', 'SOPInstanceUID']
dicom_tags_train =  dcmtag2table('../input/unifesp-x-ray-body-part-classifier/train', tags)

Listing files...
Time: 16.662697076797485
Reading files...


100%|██████████| 1738/1738 [00:33<00:00, 52.32it/s]


Finished.


In [6]:
dicom_tags_train.head(5)
#plt.imshow(pydicom.dcmread(dicom_tags_train.Filename[0]).pixel_array,cmap = 'gray')
#plt.show

Unnamed: 0,Filename,PhotometricInterpretation,BitsAllocated,SOPInstanceUID
0,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.71157989004260882669...
1,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.32467620439025796224...
2,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.74856220852423198555...
3,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.90865692473901867788...
4,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.44687741644515558201...


In [7]:
# Add Target values to dicom_tags_train
train = dicom_tags_train.merge(train_df, on = 'SOPInstanceUID')
train.head(5)

Unnamed: 0,Filename,PhotometricInterpretation,BitsAllocated,SOPInstanceUID,Target
0,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.71157989004260882669...,3
1,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.32467620439025796224...,4
2,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.74856220852423198555...,4
3,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.90865692473901867788...,7
4,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.44687741644515558201...,21


In [8]:
# See Rescale Intercept and Slope for Monochrome2 data
pydicom.dcmread(list(dicom_tags_train.Filename[dicom_tags_train.PhotometricInterpretation == "MONOCHROME2"])[5], stop_before_pixels=True)
#pydicom.dcmread(dicom_tags_train.Filename[0], stop_before_pixels=True)

Dataset.file_meta -------------------------------
(0002, 0000) File Meta Information Group Length  UL: 244
(0002, 0001) File Meta Information Version       OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID         UI: Computed Radiography Image Storage
(0002, 0003) Media Storage SOP Instance UID      UI: 1.2.826.0.1.3680043.8.498.71782455619707465800640476833895211640
(0002, 0010) Transfer Syntax UID                 UI: JPEG 2000 Image Compression (Lossless Only)
(0002, 0012) Implementation Class UID            UI: 1.2.826.0.1.3680043.2.1143.107.104.103.115.3.0.10
(0002, 0013) Implementation Version Name         SH: 'GDCM 3.0.10'
(0002, 0016) Source Application Entity Title     AE: 'gdcmconv'
-------------------------------------------------
(0008, 0005) Specific Character Set              CS: 'ISO_IR 100'
(0008, 0012) Instance Creation Date              DA: ''
(0008, 0013) Instance Creation Time              TM: ''
(0008, 0016) SOP Class UID                       UI: Computed R

In [9]:
bodyparts = {
0 :'Abdomen',
1 :'Ankle',
2 :'Cervical Spine',
3 :'Chest',
4 :'Clavicles',
5 :'Elbow',
6 :'Feet',
7 :'Finger',
8 :'Forearm',
9 :'Hand',
10 :'Hip',
11 :'Knee',
12 :'Lower Leg',
13 :'Lumbar Spine',
14 :'Others',
15 :'Pelvis',
16 :'Shoulder',
17 :'Sinus',
18 :'Skull',
19 :'Thigh' ,
20 :'Thoracic Spine',
21 :'Wrist',
}

reverse_train_labels = dict((v,k) for k,v in bodyparts.items())

def fill_targets(row):
    row.Target = np.array(row.Target.split(" "))
    for num in row.Target:
        if num != '':
            name = bodyparts[int(num)]
            row.loc[name]=1
    return row

for key in bodyparts.keys():
    train_df[bodyparts[key]] = 0

train_labels = train_df.apply(fill_targets, axis=1)
train_labels.head()

Unnamed: 0,SOPInstanceUID,Target,Abdomen,Ankle,Cervical Spine,Chest,Clavicles,Elbow,Feet,Finger,...,Lower Leg,Lumbar Spine,Others,Pelvis,Shoulder,Sinus,Skull,Thigh,Thoracic Spine,Wrist
0,1.2.826.0.1.3680043.8.498.10025629581362719970...,"[0, ]",1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1.2.826.0.1.3680043.8.498.10036150326276641158...,"[15, ]",0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,1.2.826.0.1.3680043.8.498.10038426859954986240...,"[12, ]",0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
3,1.2.826.0.1.3680043.8.498.10050991192143676483...,"[14, ]",0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
4,1.2.826.0.1.3680043.8.498.10053309524595490852...,"[3, ]",0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
train = dicom_tags_train.merge(train_labels, on = 'SOPInstanceUID')
train.head()

Unnamed: 0,Filename,PhotometricInterpretation,BitsAllocated,SOPInstanceUID,Target,Abdomen,Ankle,Cervical Spine,Chest,Clavicles,...,Lower Leg,Lumbar Spine,Others,Pelvis,Shoulder,Sinus,Skull,Thigh,Thoracic Spine,Wrist
0,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.71157989004260882669...,"[3, ]",0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.32467620439025796224...,"[4, ]",0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.74856220852423198555...,"[4, ]",0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.90865692473901867788...,"[7, ]",0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.44687741644515558201...,"[21, ]",0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


# Explore Images

# Create Data Loader

In [11]:
class DataLoader(torch.utils.data.Dataset): #torch....Dataset characterizes a dataset for Pytorch
    def __init__(self, in_dir):
        # Make function incorporating dcmtag2table and the steps required to organize data
        pass
    
    def __getitem__(self, index):
        # 'Generates one sample of data'
        
        # returns <input> and <target>, so the image and the vector of classification
        
        pass
        

# Define the model
Try a few different network models in order to figure out what works best

In [12]:
class Model_v1(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # Define the different layers
        
        
        
        pass
    
    def forward(self, x):
        # Forward pass information
        
        pass

# Define the optimizer

# Train the Network

# Test the Network