# Get Ratings from CSV

In [6]:
import csv

def get_ratings_list(log_path):
    
    ratings_list = []
    
    with open(log_path) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')

        row_count = 0

        for row in csv_reader:

            # ignore column name row
            if (row_count != 0):

                ratings_list.append([row[1], int(row[2])])

            row_count = row_count + 1
        
    return ratings_list


# Inter-Rater Variability Analysis

## Sorting images and labels

In [7]:
def get_first(elem):
    return elem[0]


log_path_ali = "./data/logs/ratings_ali.csv"
log_path_peter = "./data/logs/ratings_peter.csv"

ratings_ali = get_ratings_list(log_path_ali)
ratings_peter = get_ratings_list(log_path_peter)

ratings_ali = sorted(ratings_ali, key=get_first)
ratings_peter = sorted(ratings_peter, key=get_first)

print(ratings_ali[:10])
print(ratings_peter[:10])


#check if length of two 2d arrays are the same

[['IM-0001-0001-0001.dcm', 1], ['IM-0001-0002-0001.dcm', 0], ['IM-0001-0003-0001.dcm', 2], ['IM-0001-0004-0001.dcm', 1], ['IM-0001-0005-0001.dcm', 1], ['IM-0001-0006-0001.dcm', 1], ['IM-0001-0007-0001.dcm', 1], ['IM-0001-0008-0001.dcm', 1], ['IM-0001-0009-0001.dcm', 0], ['IM-0001-0010-0001.dcm', 1]]
[['IM-0001-0001-0001.dcm', 2], ['IM-0001-0002-0001.dcm', 0], ['IM-0001-0003-0001.dcm', 1], ['IM-0001-0004-0001.dcm', 1], ['IM-0001-0005-0001.dcm', 1], ['IM-0001-0006-0001.dcm', 1], ['IM-0001-0007-0001.dcm', 1], ['IM-0001-0008-0001.dcm', 1], ['IM-0001-0009-0001.dcm', 1], ['IM-0001-0010-0001.dcm', 1]]


# Generate Folder Structure from CSV

In [11]:
import shutil
import mritopng
import os


#####------ ADJUST SETTINGS BELOW ------#####

log_path = "./data/logs/"
data_pool_path = "./data/data_pool_dicom/"
output_dir = "./data/generated_sets/"

rater_log_paths = ["ratings_ali.csv", "ratings_peter.csv"]
output_paths = ["ali_labels/", "peter_labels/"]


#####------ ADJUST SETTINGS ABOVE ------#####


for rater_log, output_path in zip(rater_log_paths, output_paths):
    
    print("using rater log: " + rater_log)
    print("outputting to: " + output_dir + output_path)
    
    if os.path.exists(output_dir + output_path):
        shutil.rmtree(output_dir + output_path)
    os.mkdir(output_dir + output_path)

    os.mkdir(output_dir + output_path + "0/")
    os.mkdir(output_dir + output_path + "1/")
    os.mkdir(output_dir + output_path + "2/")

    ratings = get_ratings_list(log_path + rater_log)
    
    total_len = len(ratings)
    count = 0
    
    for elem in ratings:

        if (count % 10 == 0):
            print("progress: " + str(count) + "/" + str(total_len))


        if (elem[1] != 100):
            mritopng.convert_file(data_pool_path + elem[0],  
                                  output_dir + output_path + str(elem[1]) + "/" + elem[0] + ".png")

        count = count + 1

# above seems to take care of contrast as well
        
    

using rater log: ratings_ali.csv
outputting to: ./data/generated_sets/ali_labels/
progress: 0/2111
progress: 10/2111
progress: 20/2111
progress: 30/2111
progress: 40/2111
progress: 50/2111
progress: 60/2111
progress: 70/2111
progress: 80/2111
progress: 90/2111
progress: 100/2111
progress: 110/2111
progress: 120/2111
progress: 130/2111
progress: 140/2111
progress: 150/2111
progress: 160/2111
progress: 170/2111
progress: 180/2111
progress: 190/2111
progress: 200/2111
progress: 210/2111
progress: 220/2111
progress: 230/2111
progress: 240/2111
progress: 250/2111
progress: 260/2111
progress: 270/2111
progress: 280/2111
progress: 290/2111
progress: 300/2111
progress: 310/2111
progress: 320/2111
progress: 330/2111
progress: 340/2111
progress: 350/2111
progress: 360/2111
progress: 370/2111
progress: 380/2111
progress: 390/2111
progress: 400/2111
progress: 410/2111
progress: 420/2111
progress: 430/2111
progress: 440/2111
progress: 450/2111
progress: 460/2111
progress: 470/2111
progress: 480/211

InvalidDicomError: File is missing DICOM File Meta Information header or the 'DICM' prefix is missing from the header. Use force=True to force reading.

# Split into Train, Eval, Test

In [None]:
import load_data_utility
import shutil
import os
import pathlib

#####------ ADJUST SETTINGS BELOW ------#####

data_root = "./data/original_data/data_relabeled_undersampled_png/"
output_folter = "./data/generated_splits/undersampled/"
split_ratios = [0.7, 0.1, 0.2]

#####------ ADJUST SETTINGS ABOVE ------#####

image_paths = load_data_utility.load_image_paths(data_root)

print(image_paths[:5])

train_paths, eval_paths, test_paths = load_data_utility.split(image_paths, split=split_ratios, seed=777)

def get_label(path):
    
    path_location = pathlib.Path(path)
    
    str1 = str(path_location.parents[0])
    str2 = str(path_location.parents[1])
    
    
    retval = str1.replace(str2, '')
    retval = retval.replace("/", '')
    
    return retval


print(train_paths[0])
print(get_label(train_paths[0]))

if os.path.exists(output_folter):
    shutil.rmtree(output_folter)
    
os.mkdir(output_folter)
os.mkdir(output_folter + "train/")
os.mkdir(output_folter + "eval/")
os.mkdir(output_folter + "test/")
os.mkdir(output_folter + "train/0/")
os.mkdir(output_folter + "train/1/")
os.mkdir(output_folter + "train/2/")
os.mkdir(output_folter + "eval/0/")
os.mkdir(output_folter + "eval/1/")
os.mkdir(output_folter + "eval/2/")
os.mkdir(output_folter + "test/0/")
os.mkdir(output_folter + "test/1/")
os.mkdir(output_folter + "test/2/")


for pngfile in train_paths:
    shutil.copy(pngfile, output_folter + "train/" + get_label(pngfile) + "/" )

for pngfile in eval_paths:
    shutil.copy(pngfile, output_folter + "eval/" + get_label(pngfile) + "/" )
    
for pngfile in test_paths:
    shutil.copy(pngfile, output_folter + "test/" + get_label(pngfile) + "/" )


# Dataset Balancing

## Balancing Utility Functions

In [None]:
import pathlib
import random

# load an array of image paths
def load_sorted_paths(path):
    
    data_root = pathlib.Path(path)
    
    paths = []
    
    for child in data_root.iterdir():
        if (child.is_dir() and child.name != ".DS_Store"):
            print(child)
            
            subdir_paths = list(child.glob('**/*'))
            subdir_paths = [str(path) for path in subdir_paths if path.name != ".DS_Store"]
        
            paths.append(subdir_paths)
    
    return paths


# load an array of image paths
def load_dir_paths(path):
    
    data_root = pathlib.Path(path)
    
    paths = []
    
    for child in data_root.iterdir():
        if (child.name != ".DS_Store"):
         
            paths.append(str(child))
    
    return paths

## Inserting Noisy Images

In [None]:
import synthetic_motion_utility as synth
import imageio

input_data_dir = "./data/generated_splits/train_set_balanced/train/1/"
output_data_dir = "./data/generated_splits/train_set_balanced/train/0/"

all_image_paths = load_dir_paths(input_data_dir)

num_synthetic = 400
counter = 0

random.seed(a=777)
random.shuffle(all_image_paths)

while counter < num_synthetic:
    
    im = imageio.imread(all_image_paths[counter])
    
    im = synth.add_motion_artifact(im, seed=counter)
    
    imageio.imwrite(output_data_dir + "synthetic_" + str(counter) + ".png", im)
    
    counter = counter + 1

## Oversampling 2-Label Images

In [None]:
import os, glob, shutil

data_root_2 = "./data/generated_splits/train_set_balanced/train/2/"
image_paths_2 = load_dir_paths(data_root_2)

print(image_paths_2[:10])

for filename in glob.glob(data_root_2 + "duplicate*"):
    os.remove(filename) 

counter = 0
for pngfile in image_paths_2:
    shutil.copy(pngfile, data_root_2 + "duplicate_" +  str(counter) + ".png" )
    counter = counter + 1


## Undersampling Images

In [None]:
import os
import random

random.seed(a=777)

data_root = "./data/original_data/data_relabeled_undersampled_png/"

data_root = data_root + "1/"

paths_in_dir = load_dir_paths(data_root)

random.shuffle(paths_in_dir)

print(paths_in_dir[:10])

random.seed(a=777)

for path in paths_in_dir:
    
    if (random.choice([True, False])):
        print("removing: " + path)
        os.remove(path)