In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Load in the relevant images for analysis

In [2]:
import pydicom
import torch
from tqdm import tqdm
import time
from matplotlib import pyplot as plt

In [3]:
# Load in the training data target values
train_df = pd.read_csv('../input/unifesp-x-ray-body-part-classifier/train.csv')
train_df.head(10)

Unnamed: 0,SOPInstanceUID,Target
0,1.2.826.0.1.3680043.8.498.10025629581362719970...,0
1,1.2.826.0.1.3680043.8.498.10036150326276641158...,15
2,1.2.826.0.1.3680043.8.498.10038426859954986240...,12
3,1.2.826.0.1.3680043.8.498.10050991192143676483...,14
4,1.2.826.0.1.3680043.8.498.10053309524595490852...,3
5,1.2.826.0.1.3680043.8.498.10053755320637729867...,3
6,1.2.826.0.1.3680043.8.498.10062189329714053601...,14
7,1.2.826.0.1.3680043.8.498.10065930002825553435...,13 20
8,1.2.826.0.1.3680043.8.498.10072001800484199846...,3
9,1.2.826.0.1.3680043.8.498.10077219503857952412...,3


In [4]:
def dcmtag2table(folder, list_of_tags):
    """
    Create a Pandas DataFrame with the <list_of_tags> DICOM tags
    from the DICOM files in <folder>
    
    Parameters:
        folder (str): path for the folder to be recursively walked through looking for DICOM files
        list_of_tags (list of strings): a list of DICOM tags with no whitespaces
        
    Returns:
        df (DataFrame): table of DICOM tags from the files in <folder>
    """
    
    list_of_tags = list_of_tags.copy()
    items = []
    table = []
    filelist = []
    print("Listing files...")
    start = time.time()
    for root, dirs, files in os.walk(folder, topdown=False):
        for name in files:
            filelist.append(os.path.join(root,name)) #Get list of files that are present in the folder
    
    print("Time: " + str(time.time() - start))
    print("Reading files...")
    time.sleep(2)
    for _f in tqdm(filelist):
        try:
            ds = pydicom.dcmread(_f, stop_before_pixels=True) #Try to read each of the files, if they aren't pydicom compatible, run <except>
            items = [] # ^ The "stop_before_pixels" prevents the loading of the raw data, just metadata
            items.append(_f)

            for _tag in list_of_tags:
                if _tag in ds:
                    items.append(ds.data_element(_tag).value) #Gets the values for the paremeters defined in <list_of_tags>
                else:
                    items.append("Not found")

            table.append((items)) #make a table of the tag values
        except:
            print("Skipping non-DICOM: " + _f)

    
    list_of_tags.insert(0, "Filename") #add "Filename" to <list_of_tags>
    test = list(map(list, zip(*table)))
    dictone = {}

    for i, _tag in enumerate (list_of_tags):
        dictone[_tag] = test[i]

    df = pd.DataFrame(dictone)
    time.sleep(2)
    print("Finished.")
    return df
    

In [5]:
tags = ['PhotometricInterpretation','BitsAllocated', 'SOPInstanceUID']
dicom_tags_train =  dcmtag2table('../input/unifesp-x-ray-body-part-classifier/train', tags)

Listing files...
Time: 18.155726194381714
Reading files...


100%|██████████| 1738/1738 [00:31<00:00, 54.94it/s]


Finished.


In [6]:
dicom_tags_train.head(5)
#plt.imshow(pydicom.dcmread(dicom_tags_train.Filename[0]).pixel_array,cmap = 'gray')
#plt.show

Unnamed: 0,Filename,PhotometricInterpretation,BitsAllocated,SOPInstanceUID
0,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.71157989004260882669...
1,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.32467620439025796224...
2,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.74856220852423198555...
3,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.90865692473901867788...
4,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.44687741644515558201...


In [7]:
# Add Target values to dicom_tags_train

train = dicom_tags_train.merge(train_df, on = 'SOPInstanceUID')
train.head(5)

Unnamed: 0,Filename,PhotometricInterpretation,BitsAllocated,SOPInstanceUID,Target
0,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.71157989004260882669...,3
1,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.32467620439025796224...,4
2,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.74856220852423198555...,4
3,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.90865692473901867788...,7
4,../input/unifesp-x-ray-body-part-classifier/tr...,MONOCHROME1,16,1.2.826.0.1.3680043.8.498.44687741644515558201...,21


In [8]:
train.PhotometricInterpretation.unique()

array(['MONOCHROME1', 'MONOCHROME2'], dtype=object)

# Define the model
Try a few different network models in order to figure out what works best

# Define the optimizer

# Train the Network

# Test the Network