# Get Ratings from CSV

In [6]:
import csv

def get_ratings_list(log_path):
    
    ratings_list = []
    
    with open(log_path) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')

        row_count = 0

        for row in csv_reader:

            # ignore column name row
            if (row_count != 0):

                ratings_list.append([row[1], int(row[2])])

            row_count = row_count + 1
        
    return ratings_list


# Inter-Rater Variability Analysis

## Sorting images and labels

In [7]:
def get_first(elem):
    return elem[0]


log_path_ali = "./data/logs/ratings_ali.csv"
log_path_peter = "./data/logs/ratings_peter.csv"

ratings_ali = get_ratings_list(log_path_ali)
ratings_peter = get_ratings_list(log_path_peter)

ratings_ali = sorted(ratings_ali, key=get_first)
ratings_peter = sorted(ratings_peter, key=get_first)

print(ratings_ali[:10])
print(ratings_peter[:10])


#check if length of two 2d arrays are the same

[['IM-0001-0001-0001.dcm', 1], ['IM-0001-0002-0001.dcm', 0], ['IM-0001-0003-0001.dcm', 2], ['IM-0001-0004-0001.dcm', 1], ['IM-0001-0005-0001.dcm', 1], ['IM-0001-0006-0001.dcm', 1], ['IM-0001-0007-0001.dcm', 1], ['IM-0001-0008-0001.dcm', 1], ['IM-0001-0009-0001.dcm', 0], ['IM-0001-0010-0001.dcm', 1]]
[['IM-0001-0001-0001.dcm', 2], ['IM-0001-0002-0001.dcm', 0], ['IM-0001-0003-0001.dcm', 1], ['IM-0001-0004-0001.dcm', 1], ['IM-0001-0005-0001.dcm', 1], ['IM-0001-0006-0001.dcm', 1], ['IM-0001-0007-0001.dcm', 1], ['IM-0001-0008-0001.dcm', 1], ['IM-0001-0009-0001.dcm', 1], ['IM-0001-0010-0001.dcm', 1]]


# Generate Folder Structure from CSV

In [None]:
import shutil
import mritopng
import os


#####------ ADJUST SETTINGS BELOW ------#####

log_path = "./data/logs/"
data_pool_path = "./data/data_pool_dicom/"
output_dir = "./data/generated_sets/"

rater_log_paths = ["ratings_ali.csv", "ratings_peter.csv"]
output_paths = ["ali_labels/", "peter_labels/"]


#####------ ADJUST SETTINGS ABOVE ------#####


for rater_log, output_path in zip(rater_log_paths, output_paths):
    
    print("using rater log: " + rater_log)
    print("outputting to: " + output_dir + output_path)
    
    if os.path.exists(output_dir + output_path):
        shutil.rmtree(output_dir + output_path)
    os.mkdir(output_dir + output_path)

    os.mkdir(output_dir + output_path + "0/")
    os.mkdir(output_dir + output_path + "1/")
    os.mkdir(output_dir + output_path + "2/")

    ratings = get_ratings_list(log_path + rater_log)
    
    total_len = len(ratings)
    count = 0
    
    for elem in ratings:

        if (count % 10 == 0):
            print("progress: " + str(count) + "/" + str(total_len))


        if (elem[1] != 100):
            
            print(data_pool_path + elem[0])
            
            try:
                mritopng.convert_file(data_pool_path + elem[0],  
                                      output_dir + output_path + str(elem[1]) + "/" + elem[0] + ".png")

            except Exception as err:
                print(err)
            
            
        count = count + 1

# above seems to take care of contrast as well
        
    

using rater log: ratings_ali.csv
outputting to: ./data/generated_sets/ali_labels/
progress: 0/2111
./data/data_pool_dicom/IM-0031-0001-0001.dcm
./data/data_pool_dicom/IM-0018-0029-0001.dcm
./data/data_pool_dicom/IM-0014-0018-0001.dcm
./data/data_pool_dicom/IM-0065-0014-0001.dcm
./data/data_pool_dicom/IM-0069-0025-0001.dcm
./data/data_pool_dicom/IM-0021-0004-0001.dcm
./data/data_pool_dicom/IM-0025-0016-0001.dcm
./data/data_pool_dicom/IM-0048-0005-0001.dcm
./data/data_pool_dicom/IM-0032-0023-0001.dcm
./data/data_pool_dicom/IM-0012-0008-0001.dcm
progress: 10/2111
./data/data_pool_dicom/IM-0046-0027-0001.dcm
./data/data_pool_dicom/IM-0026-0014-0001.dcm
./data/data_pool_dicom/IM-0045-0014-0001.dcm
./data/data_pool_dicom/IM-0039-0029-0001.dcm
./data/data_pool_dicom/IM-0048-0006-0001.dcm
./data/data_pool_dicom/IM-0028-0017-0001.dcm
./data/data_pool_dicom/IM-0068-0006-0001.dcm
./data/data_pool_dicom/IM-0044-0002-0001.dcm
./data/data_pool_dicom/IM-0014-0001-0001.dcm
./data/data_pool_dicom/IM-00

./data/data_pool_dicom/IM-0045-0004-0001.dcm
./data/data_pool_dicom/IM-0047-0019-0001.dcm
./data/data_pool_dicom/IM-0029-0022-0001.dcm
./data/data_pool_dicom/IM-0010-0024-0001.dcm
./data/data_pool_dicom/IM-0048-0019-0001.dcm
progress: 180/2111
./data/data_pool_dicom/IM-0044-0015-0001.dcm
./data/data_pool_dicom/IM-0036-0012-0001.dcm
./data/data_pool_dicom/IM-0003-0025-0001.dcm
./data/data_pool_dicom/IM-0002-0026-0001.dcm
./data/data_pool_dicom/IM-0014-0002-0001.dcm
./data/data_pool_dicom/IM-0049-0014-0001.dcm
./data/data_pool_dicom/IM-0069-0003-0001.dcm
./data/data_pool_dicom/IM-0025-0002-0001.dcm
./data/data_pool_dicom/IM-0029-0004-0001.dcm
./data/data_pool_dicom/IM-0017-0006-0001.dcm
progress: 190/2111
./data/data_pool_dicom/IM-0024-0022-0001.dcm
./data/data_pool_dicom/IM-0060-0008-0001.dcm
./data/data_pool_dicom/IM-0030-0002-0001.dcm
./data/data_pool_dicom/IM-0040-0022-0001.dcm
./data/data_pool_dicom/IM-0051-0018-0001.dcm
./data/data_pool_dicom/IM-0029-0005-0001.dcm
./data/data_pool_

./data/data_pool_dicom/IM-0065-0016-0001.dcm
progress: 360/2111
./data/data_pool_dicom/IM-0058-0008-0001.dcm
./data/data_pool_dicom/IM-0052-0042-0001.dcm
./data/data_pool_dicom/IM-0057-0026-0001.dcm
./data/data_pool_dicom/IM-0051-0023-0001.dcm
./data/data_pool_dicom/IM-0037-0005-0001.dcm
./data/data_pool_dicom/IM-0053-0001-0001.dcm
./data/data_pool_dicom/IM-0019-0011-0001.dcm
./data/data_pool_dicom/IM-0026-0012-0001.dcm
./data/data_pool_dicom/IM-0021-0026-0001.dcm
./data/data_pool_dicom/IM-0019-0023-0001.dcm
progress: 370/2111
./data/data_pool_dicom/IM-0071-0001-0001.dcm
./data/data_pool_dicom/IM-0035-0016-0001.dcm
./data/data_pool_dicom/IM-0033-0025-0001.dcm
./data/data_pool_dicom/IM-0005-0025-0001.dcm
./data/data_pool_dicom/IM-0022-0012-0001.dcm
./data/data_pool_dicom/IM-0011-0014-0001.dcm
./data/data_pool_dicom/IM-0039-0036-0001.dcm
./data/data_pool_dicom/IM-0041-0017-0001.dcm
./data/data_pool_dicom/IM-0061-0009-0001.dcm
./data/data_pool_dicom/IM-0068-0020-0001.dcm
progress: 380/211

./data/data_pool_dicom/IM-0020-0023-0001.dcm
./data/data_pool_dicom/IM-0048-0021-0001.dcm
./data/data_pool_dicom/IM-0013-0009-0001.dcm
progress: 540/2111
./data/data_pool_dicom/IM-0014-0004-0001.dcm
./data/data_pool_dicom/IM-0061-0022-0001.dcm
./data/data_pool_dicom/IM-0069-0002-0001.dcm
./data/data_pool_dicom/IM-0025-0017-0001.dcm
./data/data_pool_dicom/IM-0028-0027-0001.dcm
./data/data_pool_dicom/IM-0013-0007-0001.dcm
./data/data_pool_dicom/IM-0066-0003-0001.dcm
./data/data_pool_dicom/IM-0016-0026-0001.dcm
./data/data_pool_dicom/IM-0055-0028-0001.dcm
./data/data_pool_dicom/IM-0048-0025-0001.dcm
progress: 550/2111
./data/data_pool_dicom/IM-0021-0025-0001.dcm
./data/data_pool_dicom/IM-0035-0014-0001.dcm
./data/data_pool_dicom/IM-0020-0028-0001.dcm
./data/data_pool_dicom/IM-0018-0009-0001.dcm
./data/data_pool_dicom/IM-0027-0029-0001.dcm
./data/data_pool_dicom/IM-0011-0003-0001.dcm
./data/data_pool_dicom/IM-0043-0018-0001.dcm
./data/data_pool_dicom/IM-0065-0009-0001.dcm
./data/data_pool_

./data/data_pool_dicom/IM-0021-0013-0001.dcm
./data/data_pool_dicom/IM-0005-0026-0001.dcm
./data/data_pool_dicom/IM-0033-0001-0001.dcm
progress: 720/2111
./data/data_pool_dicom/IM-0048-0002-0001.dcm
./data/data_pool_dicom/IM-0035-0005-0001.dcm
./data/data_pool_dicom/IM-0062-0004-0001.dcm
./data/data_pool_dicom/IM-0003-0007-0001.dcm
./data/data_pool_dicom/IM-0023-0007-0001.dcm
./data/data_pool_dicom/IM-0058-0006-0001.dcm
./data/data_pool_dicom/IM-0027-0001-0001.dcm
./data/data_pool_dicom/IM-0036-0016-0001.dcm
./data/data_pool_dicom/IM-0019-0017-0001.dcm
./data/data_pool_dicom/IM-0053-0020-0001.dcm
progress: 730/2111
./data/data_pool_dicom/IM-0039-0020-0001.dcm
./data/data_pool_dicom/IM-0047-0012-0001.dcm
./data/data_pool_dicom/IM-0057-0025-0001.dcm
./data/data_pool_dicom/IM-0059-0015-0001.dcm
./data/data_pool_dicom/IM-0003-0006-0001.dcm
./data/data_pool_dicom/IM-0065-0025-0001.dcm
./data/data_pool_dicom/IM-0053-0011-0001.dcm
./data/data_pool_dicom/IM-0066-0015-0001.dcm
./data/data_pool_

./data/data_pool_dicom/IM-0007-0029-0001.dcm
./data/data_pool_dicom/IM-0012-0027-0001.dcm
./data/data_pool_dicom/IM-0021-0014-0001.dcm
./data/data_pool_dicom/IM-0042-0006-0001.dcm
./data/data_pool_dicom/IM-0011-0009-0001.dcm
./data/data_pool_dicom/IM-0070-0032-0001.dcm
./data/data_pool_dicom/IM-0006-0036-0001.dcm
progress: 910/2111
./data/data_pool_dicom/IM-0066-0001-0001.dcm
./data/data_pool_dicom/IM-0006-0030-0001.dcm
./data/data_pool_dicom/IM-0052-0008-0001.dcm
./data/data_pool_dicom/IM-0016-0025-0001.dcm
./data/data_pool_dicom/IM-0022-0003-0001.dcm
./data/data_pool_dicom/IM-0025-0003-0001.dcm
./data/data_pool_dicom/IM-0071-0010-0001.dcm
./data/data_pool_dicom/IM-0006-0009-0001.dcm
./data/data_pool_dicom/IM-0046-0028-0001.dcm
./data/data_pool_dicom/IM-0003-0036-0001.dcm
progress: 920/2111
./data/data_pool_dicom/IM-0071-0024-0001.dcm
./data/data_pool_dicom/IM-0045-0016-0001.dcm
./data/data_pool_dicom/IM-0027-0015-0001.dcm
./data/data_pool_dicom/IM-0060-0014-0001.dcm
./data/data_pool_

./data/data_pool_dicom/IM-0023-0002-0001.dcm
./data/data_pool_dicom/IM-0036-0018-0001.dcm
./data/data_pool_dicom/IM-0060-0016-0001.dcm
./data/data_pool_dicom/IM-0009-0002-0001.dcm
./data/data_pool_dicom/IM-0012-0012-0001.dcm
progress: 1090/2111
./data/data_pool_dicom/IM-0071-0023-0001.dcm
./data/data_pool_dicom/IM-0071-0007-0001.dcm
./data/data_pool_dicom/IM-0021-0023-0001.dcm
./data/data_pool_dicom/IM-0019-0013-0001.dcm
./data/data_pool_dicom/IM-0027-0022-0001.dcm
./data/data_pool_dicom/IM-0042-0015-0001.dcm
./data/data_pool_dicom/IM-0047-0006-0001.dcm
./data/data_pool_dicom/IM-0055-0024-0001.dcm
./data/data_pool_dicom/IM-0016-0031-0001.dcm
./data/data_pool_dicom/IM-0040-0020-0001.dcm
progress: 1100/2111
./data/data_pool_dicom/IM-0040-0006-0001.dcm
./data/data_pool_dicom/IM-0058-0005-0001.dcm
./data/data_pool_dicom/IM-0024-0011-0001.dcm
./data/data_pool_dicom/IM-0047-0016-0001.dcm
./data/data_pool_dicom/IM-0062-0021-0001.dcm
./data/data_pool_dicom/IM-0017-0020-0001.dcm
./data/data_poo

progress: 1260/2111
./data/data_pool_dicom/IM-0067-0031-0001.dcm
./data/data_pool_dicom/IM-0038-0014-0001.dcm
./data/data_pool_dicom/IM-0039-0027-0001.dcm
./data/data_pool_dicom/IM-0004-0002-0001.dcm
./data/data_pool_dicom/IM-0056-0003-0001.dcm
./data/data_pool_dicom/IM-0004-0004-0001.dcm
./data/data_pool_dicom/IM-0002-0027-0001.dcm
./data/data_pool_dicom/IM-0062-0010-0001.dcm
./data/data_pool_dicom/IM-0055-0019-0001.dcm
./data/data_pool_dicom/IM-0039-0028-0001.dcm
progress: 1270/2111
./data/data_pool_dicom/IM-0045-0025-0001.dcm
./data/data_pool_dicom/IM-0064-0008-0001.dcm
./data/data_pool_dicom/IM-0054-0004-0001.dcm
./data/data_pool_dicom/IM-0023-0011-0001.dcm
./data/data_pool_dicom/IM-0004-0030-0001.dcm
./data/data_pool_dicom/IM-0003-0023-0001.dcm
./data/data_pool_dicom/IM-0042-0007-0001.dcm
./data/data_pool_dicom/IM-0020-0009-0001.dcm
./data/data_pool_dicom/IM-0039-0004-0001.dcm
./data/data_pool_dicom/IM-0027-0009-0001.dcm
progress: 1280/2111
./data/data_pool_dicom/IM-0035-0023-0001

./data/data_pool_dicom/IM-0021-0010-0001.dcm
./data/data_pool_dicom/IM-0006-0008-0001.dcm
./data/data_pool_dicom/IM-0043-0004-0001.dcm
./data/data_pool_dicom/IM-0040-0004-0001.dcm
./data/data_pool_dicom/IM-0002-0032-0001.dcm
./data/data_pool_dicom/IM-0020-0008-0001.dcm
progress: 1450/2111
./data/data_pool_dicom/IM-0015-0017-0001.dcm
./data/data_pool_dicom/IM-0004-0017-0001.dcm
./data/data_pool_dicom/IM-0006-0017-0001.dcm
./data/data_pool_dicom/IM-0012-0016-0001.dcm
./data/data_pool_dicom/IM-0068-0027-0001.dcm
./data/data_pool_dicom/IM-0065-0002-0001.dcm
./data/data_pool_dicom/IM-0021-0021-0001.dcm
./data/data_pool_dicom/IM-0010-0030-0001.dcm
./data/data_pool_dicom/IM-0028-0001-0001.dcm
./data/data_pool_dicom/IM-0069-0015-0001.dcm
progress: 1460/2111
./data/data_pool_dicom/IM-0042-0023-0001.dcm
./data/data_pool_dicom/IM-0068-0005-0001.dcm
./data/data_pool_dicom/IM-0070-0016-0001.dcm
./data/data_pool_dicom/IM-0021-0020-0001.dcm
./data/data_pool_dicom/IM-0051-0014-0001.dcm
./data/data_poo

./data/data_pool_dicom/IM-0067-0030-0001.dcm
./data/data_pool_dicom/IM-0015-0028-0001.dcm
./data/data_pool_dicom/IM-0015-0016-0001.dcm
./data/data_pool_dicom/IM-0039-0023-0001.dcm
./data/data_pool_dicom/IM-0052-0030-0001.dcm
./data/data_pool_dicom/IM-0002-0012-0001.dcm
./data/data_pool_dicom/IM-0011-0019-0001.dcm
progress: 1630/2111
./data/data_pool_dicom/IM-0008-0013-0001.dcm
./data/data_pool_dicom/IM-0018-0012-0001.dcm
./data/data_pool_dicom/IM-0058-0010-0001.dcm
./data/data_pool_dicom/IM-0046-0016-0001.dcm
./data/data_pool_dicom/IM-0005-0007-0001.dcm
./data/data_pool_dicom/IM-0049-0012-0001.dcm
./data/data_pool_dicom/IM-0005-0017-0001.dcm
./data/data_pool_dicom/IM-0041-0023-0001.dcm
./data/data_pool_dicom/IM-0060-0006-0001.dcm
./data/data_pool_dicom/IM-0033-0014-0001.dcm
progress: 1640/2111
./data/data_pool_dicom/IM-0061-0028-0001.dcm
./data/data_pool_dicom/IM-0039-0002-0001.dcm
./data/data_pool_dicom/IM-0060-0003-0001.dcm
./data/data_pool_dicom/IM-0044-0018-0001.dcm
./data/data_poo

./data/data_pool_dicom/IM-0050-0008-0001.dcm
./data/data_pool_dicom/IM-0007-0025-0001.dcm
progress: 1800/2111
./data/data_pool_dicom/IM-0016-0035-0001.dcm
./data/data_pool_dicom/IM-0002-0030-0001.dcm
./data/data_pool_dicom/IM-0038-0024-0001.dcm
./data/data_pool_dicom/IM-0032-0015-0001.dcm
./data/data_pool_dicom/IM-0005-0003-0001.dcm
./data/data_pool_dicom/IM-0053-0027-0001.dcm
./data/data_pool_dicom/IM-0050-0019-0001.dcm
./data/data_pool_dicom/IM-0045-0023-0001.dcm
./data/data_pool_dicom/IM-0019-0025-0001.dcm
./data/data_pool_dicom/IM-0010-0014-0001.dcm
progress: 1810/2111
./data/data_pool_dicom/IM-0066-0005-0001.dcm
./data/data_pool_dicom/IM-0064-0002-0001.dcm
./data/data_pool_dicom/IM-0037-0008-0001.dcm
./data/data_pool_dicom/IM-0025-0007-0001.dcm
./data/data_pool_dicom/IM-0026-0027-0001.dcm
./data/data_pool_dicom/IM-0013-0006-0001.dcm
./data/data_pool_dicom/IM-0004-0007-0001.dcm
./data/data_pool_dicom/IM-0058-0026-0001.dcm
./data/data_pool_dicom/IM-0056-0021-0001.dcm
./data/data_poo

./data/data_pool_dicom/IM-0044-0019-0001.dcm
./data/data_pool_dicom/IM-0065-0020-0001.dcm
./data/data_pool_dicom/IM-0027-0019-0001.dcm
./data/data_pool_dicom/IM-0025-0011-0001.dcm
./data/data_pool_dicom/IM-0004-0032-0001.dcm
./data/data_pool_dicom/IM-0061-0029-0001.dcm
./data/data_pool_dicom/IM-0036-0007-0001.dcm
./data/data_pool_dicom/IM-0061-0006-0001.dcm
progress: 1990/2111
./data/data_pool_dicom/IM-0028-0009-0001.dcm
./data/data_pool_dicom/IM-0016-0028-0001.dcm
./data/data_pool_dicom/IM-0046-0024-0001.dcm
./data/data_pool_dicom/IM-0049-0003-0001.dcm
./data/data_pool_dicom/IM-0028-0007-0001.dcm
./data/data_pool_dicom/IM-0045-0001-0001.dcm
./data/data_pool_dicom/IM-0036-0009-0001.dcm
./data/data_pool_dicom/IM-0004-0014-0001.dcm
./data/data_pool_dicom/IM-0054-0005-0001.dcm
./data/data_pool_dicom/IM-0001-0001-0001.dcm
progress: 2000/2111
./data/data_pool_dicom/IM-0031-0013-0001.dcm
./data/data_pool_dicom/IM-0049-0002-0001.dcm
./data/data_pool_dicom/IM-0055-0025-0001.dcm
./data/data_poo

./data/data_pool_dicom/IM-0057-0015-0001.dcm
progress: 50/2111
./data/data_pool_dicom/IM-0046-0004-0001.dcm
./data/data_pool_dicom/IM-0003-0025-0001.dcm
./data/data_pool_dicom/IM-0003-0034-0001.dcm
./data/data_pool_dicom/IM-0061-0015-0001.dcm
./data/data_pool_dicom/IM-0033-0009-0001.dcm
./data/data_pool_dicom/IM-0054-0028-0001.dcm
./data/data_pool_dicom/IM-0010-0011-0001.dcm
./data/data_pool_dicom/IM-0060-0016-0001.dcm
./data/data_pool_dicom/IM-0064-0001-0001.dcm
./data/data_pool_dicom/IM-0049-0007-0001.dcm
progress: 60/2111
./data/data_pool_dicom/IM-0004-0009-0001.dcm
./data/data_pool_dicom/IM-0003-0023-0001.dcm
./data/data_pool_dicom/IM-0004-0023-0001.dcm
./data/data_pool_dicom/IM-0026-0026-0001.dcm
./data/data_pool_dicom/IM-0057-0029-0001.dcm
./data/data_pool_dicom/IM-0026-0019-0001.dcm
./data/data_pool_dicom/IM-0054-0010-0001.dcm
./data/data_pool_dicom/IM-0028-0025-0001.dcm
./data/data_pool_dicom/IM-0048-0016-0001.dcm
./data/data_pool_dicom/IM-0002-0016-0001.dcm
progress: 70/2111
.

./data/data_pool_dicom/IM-0042-0006-0001.dcm
progress: 220/2111
./data/data_pool_dicom/IM-0047-0001-0001.dcm
./data/data_pool_dicom/IM-0036-0006-0001.dcm
./data/data_pool_dicom/IM-0010-0020-0001.dcm
./data/data_pool_dicom/IM-0068-0022-0001.dcm
./data/data_pool_dicom/IM-0026-0018-0001.dcm
./data/data_pool_dicom/IM-0069-0020-0001.dcm
./data/data_pool_dicom/IM-0066-0006-0001.dcm
./data/data_pool_dicom/IM-0055-0020-0001.dcm
./data/data_pool_dicom/IM-0057-0021-0001.dcm
./data/data_pool_dicom/IM-0033-0017-0001.dcm
progress: 230/2111
./data/data_pool_dicom/IM-0029-0028-0001.dcm
./data/data_pool_dicom/IM-0070-0026-0001.dcm
./data/data_pool_dicom/IM-0003-0013-0001.dcm
./data/data_pool_dicom/IM-0049-0006-0001.dcm
./data/data_pool_dicom/IM-0007-0020-0001.dcm
./data/data_pool_dicom/IM-0066-0023-0001.dcm
./data/data_pool_dicom/IM-0005-0004-0001.dcm
./data/data_pool_dicom/IM-0002-0028-0001.dcm
./data/data_pool_dicom/IM-0016-0012-0001.dcm
./data/data_pool_dicom/IM-0052-0017-0001.dcm
progress: 240/211

./data/data_pool_dicom/IM-0006-0025-0001.dcm
progress: 400/2111
./data/data_pool_dicom/IM-0025-0010-0001.dcm
./data/data_pool_dicom/IM-0035-0009-0001.dcm
./data/data_pool_dicom/IM-0043-0020-0001.dcm
./data/data_pool_dicom/IM-0044-0007-0001.dcm
./data/data_pool_dicom/IM-0063-0006-0001.dcm
./data/data_pool_dicom/IM-0070-0028-0001.dcm
./data/data_pool_dicom/IM-0047-0030-0001.dcm
./data/data_pool_dicom/IM-0051-0008-0001.dcm
./data/data_pool_dicom/IM-0013-0016-0001.dcm
./data/data_pool_dicom/IM-0048-0026-0001.dcm
progress: 410/2111
./data/data_pool_dicom/IM-0071-0017-0001.dcm
./data/data_pool_dicom/IM-0059-0021-0001.dcm
./data/data_pool_dicom/IM-0070-0017-0001.dcm
./data/data_pool_dicom/IM-0016-0018-0001.dcm
./data/data_pool_dicom/IM-0057-0028-0001.dcm
./data/data_pool_dicom/IM-0024-0024-0001.dcm
./data/data_pool_dicom/IM-0016-0031-0001.dcm
./data/data_pool_dicom/IM-0049-0011-0001.dcm
./data/data_pool_dicom/IM-0031-0023-0001.dcm
./data/data_pool_dicom/IM-0056-0023-0001.dcm
progress: 420/211

./data/data_pool_dicom/IM-0039-0022-0001.dcm
./data/data_pool_dicom/IM-0040-0019-0001.dcm
./data/data_pool_dicom/IM-0063-0020-0001.dcm
./data/data_pool_dicom/IM-0011-0024-0001.dcm
./data/data_pool_dicom/IM-0027-0021-0001.dcm
./data/data_pool_dicom/IM-0065-0028-0001.dcm
progress: 580/2111
./data/data_pool_dicom/IM-0023-0013-0001.dcm
./data/data_pool_dicom/IM-0041-0012-0001.dcm
./data/data_pool_dicom/IM-0040-0028-0001.dcm
./data/data_pool_dicom/IM-0057-0009-0001.dcm
./data/data_pool_dicom/IM-0071-0027-0001.dcm
./data/data_pool_dicom/IM-0003-0010-0001.dcm
./data/data_pool_dicom/IM-0003-0032-0001.dcm
./data/data_pool_dicom/IM-0022-0012-0001.dcm
./data/data_pool_dicom/IM-0048-0017-0001.dcm
./data/data_pool_dicom/IM-0014-0033-0001.dcm
progress: 590/2111
./data/data_pool_dicom/IM-0055-0010-0001.dcm
./data/data_pool_dicom/IM-0045-0022-0001.dcm
./data/data_pool_dicom/IM-0006-0033-0001.dcm
./data/data_pool_dicom/IM-0036-0022-0001.dcm
./data/data_pool_dicom/IM-0018-0025-0001.dcm
./data/data_pool_

./data/data_pool_dicom/IM-0070-0006-0001.dcm
./data/data_pool_dicom/IM-0038-0028-0001.dcm
progress: 760/2111
./data/data_pool_dicom/IM-0040-0011-0001.dcm
./data/data_pool_dicom/IM-0026-0012-0001.dcm
./data/data_pool_dicom/IM-0071-0028-0001.dcm
./data/data_pool_dicom/IM-0061-0028-0001.dcm
./data/data_pool_dicom/IM-0016-0011-0001.dcm
./data/data_pool_dicom/IM-0037-0007-0001.dcm
./data/data_pool_dicom/IM-0047-0003-0001.dcm
./data/data_pool_dicom/IM-0067-0009-0001.dcm
./data/data_pool_dicom/IM-0053-0002-0001.dcm
./data/data_pool_dicom/IM-0007-0023-0001.dcm
progress: 770/2111
./data/data_pool_dicom/IM-0008-0023-0001.dcm
./data/data_pool_dicom/IM-0007-0006-0001.dcm
./data/data_pool_dicom/IM-0065-0024-0001.dcm
./data/data_pool_dicom/IM-0055-0022-0001.dcm
./data/data_pool_dicom/IM-0014-0025-0001.dcm
./data/data_pool_dicom/IM-0063-0009-0001.dcm
./data/data_pool_dicom/IM-0009-0024-0001.dcm
./data/data_pool_dicom/IM-0007-0028-0001.dcm
./data/data_pool_dicom/IM-0004-0029-0001.dcm
./data/data_pool_

./data/data_pool_dicom/IM-0057-0018-0001.dcm
./data/data_pool_dicom/IM-0019-0023-0001.dcm
./data/data_pool_dicom/IM-0031-0030-0001.dcm
./data/data_pool_dicom/IM-0032-0016-0001.dcm
./data/data_pool_dicom/IM-0050-0012-0001.dcm
./data/data_pool_dicom/IM-0007-0015-0001.dcm
progress: 940/2111
./data/data_pool_dicom/IM-0038-0019-0001.dcm
./data/data_pool_dicom/IM-0046-0020-0001.dcm
./data/data_pool_dicom/IM-0027-0006-0001.dcm
./data/data_pool_dicom/IM-0021-0002-0001.dcm
./data/data_pool_dicom/IM-0018-0005-0001.dcm
./data/data_pool_dicom/IM-0001-0013-0001.dcm
./data/data_pool_dicom/IM-0029-0017-0001.dcm
./data/data_pool_dicom/IM-0018-0006-0001.dcm
./data/data_pool_dicom/IM-0021-0013-0001.dcm
./data/data_pool_dicom/IM-0030-0008-0001.dcm
progress: 950/2111
./data/data_pool_dicom/IM-0060-0007-0001.dcm
./data/data_pool_dicom/IM-0004-0020-0001.dcm
./data/data_pool_dicom/IM-0052-0020-0001.dcm
./data/data_pool_dicom/IM-0028-0015-0001.dcm
./data/data_pool_dicom/IM-0071-0003-0001.dcm
./data/data_pool_

./data/data_pool_dicom/IM-0062-0022-0001.dcm
progress: 1120/2111
./data/data_pool_dicom/IM-0046-0014-0001.dcm
./data/data_pool_dicom/IM-0050-0013-0001.dcm
./data/data_pool_dicom/IM-0026-0001-0001.dcm
./data/data_pool_dicom/IM-0049-0018-0001.dcm
./data/data_pool_dicom/IM-0034-0016-0001.dcm
./data/data_pool_dicom/IM-0015-0020-0001.dcm
./data/data_pool_dicom/IM-0053-0017-0001.dcm
./data/data_pool_dicom/IM-0026-0013-0001.dcm
./data/data_pool_dicom/IM-0024-0023-0001.dcm
./data/data_pool_dicom/IM-0061-0025-0001.dcm
progress: 1130/2111
./data/data_pool_dicom/IM-0013-0005-0001.dcm
./data/data_pool_dicom/IM-0007-0005-0001.dcm
./data/data_pool_dicom/IM-0032-0025-0001.dcm
./data/data_pool_dicom/IM-0058-0028-0001.dcm
./data/data_pool_dicom/IM-0048-0003-0001.dcm
./data/data_pool_dicom/IM-0063-0030-0001.dcm
./data/data_pool_dicom/IM-0002-0020-0001.dcm
./data/data_pool_dicom/IM-0020-0020-0001.dcm
./data/data_pool_dicom/IM-0021-0022-0001.dcm
./data/data_pool_dicom/IM-0030-0003-0001.dcm
progress: 1140/

./data/data_pool_dicom/IM-0018-0021-0001.dcm
./data/data_pool_dicom/IM-0040-0026-0001.dcm
./data/data_pool_dicom/IM-0047-0015-0001.dcm
./data/data_pool_dicom/IM-0049-0028-0001.dcm
./data/data_pool_dicom/IM-0017-0004-0001.dcm
progress: 1300/2111
./data/data_pool_dicom/IM-0017-0016-0001.dcm
./data/data_pool_dicom/IM-0002-0026-0001.dcm
./data/data_pool_dicom/IM-0022-0024-0001.dcm
./data/data_pool_dicom/IM-0068-0024-0001.dcm
./data/data_pool_dicom/IM-0030-0011-0001.dcm
./data/data_pool_dicom/IM-0055-0025-0001.dcm
./data/data_pool_dicom/IM-0023-0008-0001.dcm
./data/data_pool_dicom/IM-0007-0012-0001.dcm
./data/data_pool_dicom/IM-0066-0020-0001.dcm
./data/data_pool_dicom/IM-0011-0011-0001.dcm
progress: 1310/2111
./data/data_pool_dicom/IM-0021-0014-0001.dcm
./data/data_pool_dicom/IM-0051-0017-0001.dcm
./data/data_pool_dicom/IM-0016-0010-0001.dcm
./data/data_pool_dicom/IM-0030-0017-0001.dcm
./data/data_pool_dicom/IM-0020-0022-0001.dcm
./data/data_pool_dicom/IM-0012-0011-0001.dcm
./data/data_poo

./data/data_pool_dicom/IM-0008-0025-0001.dcm
./data/data_pool_dicom/IM-0037-0015-0001.dcm
./data/data_pool_dicom/IM-0025-0004-0001.dcm
progress: 1480/2111
./data/data_pool_dicom/IM-0032-0029-0001.dcm
./data/data_pool_dicom/IM-0048-0008-0001.dcm
./data/data_pool_dicom/IM-0041-0003-0001.dcm
./data/data_pool_dicom/IM-0017-0015-0001.dcm
./data/data_pool_dicom/IM-0058-0009-0001.dcm
./data/data_pool_dicom/IM-0013-0024-0001.dcm
./data/data_pool_dicom/IM-0052-0046-0001.dcm
./data/data_pool_dicom/IM-0043-0005-0001.dcm
./data/data_pool_dicom/IM-0024-0003-0001.dcm
./data/data_pool_dicom/IM-0065-0023-0001.dcm
progress: 1490/2111
./data/data_pool_dicom/IM-0008-0004-0001.dcm
./data/data_pool_dicom/IM-0039-0001-0001.dcm
./data/data_pool_dicom/IM-0054-0004-0001.dcm
./data/data_pool_dicom/IM-0030-0013-0001.dcm
./data/data_pool_dicom/IM-0058-0025-0001.dcm
./data/data_pool_dicom/IM-0007-0007-0001.dcm
./data/data_pool_dicom/IM-0067-0015-0001.dcm
./data/data_pool_dicom/IM-0002-0025-0001.dcm
./data/data_poo

./data/data_pool_dicom/IM-0033-0028-0001.dcm
./data/data_pool_dicom/IM-0043-0003-0001.dcm
./data/data_pool_dicom/IM-0038-0025-0001.dcm
./data/data_pool_dicom/IM-0056-0014-0001.dcm
progress: 1660/2111
./data/data_pool_dicom/IM-0045-0002-0001.dcm
./data/data_pool_dicom/IM-0071-0026-0001.dcm
./data/data_pool_dicom/IM-0053-0003-0001.dcm
./data/data_pool_dicom/IM-0041-0019-0001.dcm
./data/data_pool_dicom/IM-0052-0022-0001.dcm
./data/data_pool_dicom/IM-0061-0034-0001.dcm
./data/data_pool_dicom/IM-0012-0019-0001.dcm
./data/data_pool_dicom/IM-0018-0016-0001.dcm
./data/data_pool_dicom/IM-0022-0026-0001.dcm
./data/data_pool_dicom/IM-0044-0020-0001.dcm
progress: 1670/2111
./data/data_pool_dicom/IM-0012-0017-0001.dcm
./data/data_pool_dicom/IM-0054-0013-0001.dcm
./data/data_pool_dicom/IM-0031-0001-0001.dcm
./data/data_pool_dicom/IM-0019-0017-0001.dcm
./data/data_pool_dicom/IM-0070-0015-0001.dcm
./data/data_pool_dicom/IM-0020-0029-0001.dcm
./data/data_pool_dicom/IM-0021-0008-0001.dcm
./data/data_poo

# Split into Train, Eval, Test

In [None]:
import load_data_utility
import shutil
import os
import pathlib

#####------ ADJUST SETTINGS BELOW ------#####

data_root = "./data/original_data/data_relabeled_undersampled_png/"
output_folter = "./data/generated_splits/undersampled/"
split_ratios = [0.7, 0.1, 0.2]

#####------ ADJUST SETTINGS ABOVE ------#####

image_paths = load_data_utility.load_image_paths(data_root)

print(image_paths[:5])

train_paths, eval_paths, test_paths = load_data_utility.split(image_paths, split=split_ratios, seed=777)

def get_label(path):
    
    path_location = pathlib.Path(path)
    
    str1 = str(path_location.parents[0])
    str2 = str(path_location.parents[1])
    
    
    retval = str1.replace(str2, '')
    retval = retval.replace("/", '')
    
    return retval


print(train_paths[0])
print(get_label(train_paths[0]))

if os.path.exists(output_folter):
    shutil.rmtree(output_folter)
    
os.mkdir(output_folter)
os.mkdir(output_folter + "train/")
os.mkdir(output_folter + "eval/")
os.mkdir(output_folter + "test/")
os.mkdir(output_folter + "train/0/")
os.mkdir(output_folter + "train/1/")
os.mkdir(output_folter + "train/2/")
os.mkdir(output_folter + "eval/0/")
os.mkdir(output_folter + "eval/1/")
os.mkdir(output_folter + "eval/2/")
os.mkdir(output_folter + "test/0/")
os.mkdir(output_folter + "test/1/")
os.mkdir(output_folter + "test/2/")


for pngfile in train_paths:
    shutil.copy(pngfile, output_folter + "train/" + get_label(pngfile) + "/" )

for pngfile in eval_paths:
    shutil.copy(pngfile, output_folter + "eval/" + get_label(pngfile) + "/" )
    
for pngfile in test_paths:
    shutil.copy(pngfile, output_folter + "test/" + get_label(pngfile) + "/" )


# Dataset Balancing

## Balancing Utility Functions

In [None]:
import pathlib
import random

# load an array of image paths
def load_sorted_paths(path):
    
    data_root = pathlib.Path(path)
    
    paths = []
    
    for child in data_root.iterdir():
        if (child.is_dir() and child.name != ".DS_Store"):
            print(child)
            
            subdir_paths = list(child.glob('**/*'))
            subdir_paths = [str(path) for path in subdir_paths if path.name != ".DS_Store"]
        
            paths.append(subdir_paths)
    
    return paths


# load an array of image paths
def load_dir_paths(path):
    
    data_root = pathlib.Path(path)
    
    paths = []
    
    for child in data_root.iterdir():
        if (child.name != ".DS_Store"):
         
            paths.append(str(child))
    
    return paths

## Inserting Noisy Images

In [None]:
import synthetic_motion_utility as synth
import imageio

input_data_dir = "./data/generated_splits/train_set_balanced/train/1/"
output_data_dir = "./data/generated_splits/train_set_balanced/train/0/"

all_image_paths = load_dir_paths(input_data_dir)

num_synthetic = 400
counter = 0

random.seed(a=777)
random.shuffle(all_image_paths)

while counter < num_synthetic:
    
    im = imageio.imread(all_image_paths[counter])
    
    im = synth.add_motion_artifact(im, seed=counter)
    
    imageio.imwrite(output_data_dir + "synthetic_" + str(counter) + ".png", im)
    
    counter = counter + 1

## Oversampling 2-Label Images

In [None]:
import os, glob, shutil

data_root_2 = "./data/generated_splits/train_set_balanced/train/2/"
image_paths_2 = load_dir_paths(data_root_2)

print(image_paths_2[:10])

for filename in glob.glob(data_root_2 + "duplicate*"):
    os.remove(filename) 

counter = 0
for pngfile in image_paths_2:
    shutil.copy(pngfile, data_root_2 + "duplicate_" +  str(counter) + ".png" )
    counter = counter + 1


## Undersampling Images

In [None]:
import os
import random

random.seed(a=777)

data_root = "./data/original_data/data_relabeled_undersampled_png/"

data_root = data_root + "1/"

paths_in_dir = load_dir_paths(data_root)

random.shuffle(paths_in_dir)

print(paths_in_dir[:10])

random.seed(a=777)

for path in paths_in_dir:
    
    if (random.choice([True, False])):
        print("removing: " + path)
        os.remove(path)