In [110]:
# Globals
CROP_SIZE = 400
SPREADSHEET = '/vol/research/mammo2/will/data/batches/metadata/1/batch_1_IMAGE.xls'
DICOM_FILES = '/vol/research/mammo2/will/data/batches/IMAGE_DATABASE/PUBLIC_SHARE/IMAGES/STANDARD_SET/1/'

In [123]:
# functions
print('Started...')
import glob
import pandas as pd
import pydicom
import os
import matplotlib.pyplot as plt
import png
import time
import numpy as np
import os
from fnmatch import fnmatch

def getFileNames(dicom_files):
    fileList = []
    for path, subdirs, files in os.walk(dicom_files):
        for name in files:
            if fnmatch(name, '*.dcm'):
                fileList.append(os.path.join(path, name))

    print(len(fileList), ' dicom images found')
    return fileList   
    

# File walk method
def getFiles(dicom_files, verbose=1):
    fileList = []
    for path, subdirs, files in os.walk(dicom_files):
        for name in files:
            if fnmatch(name, '*.dcm'):
                fileList.append(os.path.join(path, name))

    print(len(fileList), ' dicom images found')


    # Load dicom files into np array
    dicomImg = np.array([])
    count = 0
    print(len(fileList), ' Files found')
    print('Loading images...')
    for f in fileList:
        dicomImg = np.append(dicomImg, pydicom.dcmread(f))
        count += 1
        print(count, '/', len(fileList))
    print('name:\n', dicomImg[0].PresentationIntentType)
    return dicomImg, fileList




def deletePreProcessed(dicomImg, fileList):
    forPresentationCount = 0
    toDelete = []
    for index, _ in enumerate(dicomImg):
        tmp = _.PresentationIntentType
        if tmp == 'FOR PRESENTATION':
            forPresentationCount += 1
        else:
            toDelete.append(index)
    print('dicomImg len:', len(dicomImg))
    np.delete(dicomImg, toDelete)
    mask = np.ones(len(dicomImg), dtype=bool)
    mask[toDelete] = False
    dicomImg = dicomImg[mask]
    fileList = np.asarray(fileList)
    fileList = fileList[mask]
    print('\nTotal DICOM: ', len(dicomImg))
    print('Total FOR PROCESSING: ', forPresentationCount)
    return dicomImg, fileList

# Get the imageSOPIUD of the contralateral images
# given the imageSOPIUD and spreadsheet sheet
def getContralateral(imageSOPIUD, sheet):
    print('imageSOPIUD: ', imageSOPIUD)
    properties = {'imageSOPIUD':[], 'viewPosition':[], 'imageLaterality':[], 'presentationIntentType':[]}
    
    # Find row that has the img
    indx = [_==imageSOPIUD for _ in sheet['ImageSOPIUID']].index(True) # ImageSOPIUID, ReferencedSOPInstanceUID
    studyIUID = sheet['StudyIUID'][indx]
    lesion = dict.fromkeys(properties)
    lesion['viewPosition'] = sheet['ViewPosition'][indx]
    lesion['imageLaterality'] = sheet['ImageLaterality'][indx]
    lesion['presentationIntentType'] = sheet['PresentationIntentType'][indx]
    print('\nlesion[viewPosition]: ', lesion['viewPosition'])
    print('lesion[imageLaterality]: ', lesion['imageLaterality'])
    print('lesion[presentationIntentType]: ', lesion['presentationIntentType'])

    # Get properties for all images in the same studyIUID
    studyGroup = {'imageSOPIUD': [], 'properties':[]}
    for indx, _ in enumerate(sheet['StudyIUID']):
        if studyIUID == _:
            studyGroup['properties'].append(dict.fromkeys(properties))
            studyGroup['properties'][-1]['viewPosition'] = sheet['ViewPosition'][indx]
            studyGroup['properties'][-1]['imageLaterality'] = sheet['ImageLaterality'][indx]
            studyGroup['properties'][-1]['presentationIntentType'] = sheet['PresentationIntentType'][indx]
            studyGroup['imageSOPIUD'].append(sheet['ImageSOPIUID'][indx])
    # Set properties to match
    propertiesToMatch = dict.fromkeys(properties)
    if lesion['imageLaterality'] == 'R':
        propertiesToMatch['imageLaterality'] = 'L'
    else:
        propertiesToMatch['imageLaterality'] = 'R'            
    propertiesToMatch['viewPosition'] = lesion['viewPosition']
    propertiesToMatch['presentationIntentType'] = lesion['presentationIntentType']
    
    print('\npropertiesToMatch[viewPosition]: ', propertiesToMatch['viewPosition'])
    print('propertiesToMatch[imageLaterality]: ', propertiesToMatch['imageLaterality'])
    print('propertiesToMatch[presentationIntentType]: ', propertiesToMatch['presentationIntentType'])

    # Find matches
    matches = {'imageSOPIUD': [], 'properties':[]}
    for i in range(len(studyGroup['properties'])):
        if studyGroup['properties'][i] == propertiesToMatch:
            matches['properties'].append(studyGroup['properties'][i])
            matches['imageSOPIUD'].append(studyGroup['imageSOPIUD'][i])
            print('match made')
    if len(matches['properties']) > 1 or len(matches['properties']) == 0:
        print('    MY_ERROR: ', len(matches['properties']), ' matches for contralateral')
    else:
        print('        MATCH MATCH MATCH len: ',len(matches['properties']))
        return matches['imageSOPIUD'][0]


# Remove files that are not in the spreadsheet
def getFileListROI(fList, sheet, sheetPresentation):    
    img = {}
    filesListROI = []
    for i in range(len(fList)):
        key = os.path.basename(fList[i])[:-4]
        # Find row in the xml file that holds the img info
        try:
            indx = [_==key for _ in sheet['ImageSOPIUID']].index(True) # ImageSOPIUID, ReferencedSOPInstanceUID
            # check that the image is for presentation
            if getSpreadsheetCell('PresentationIntentType', key, sheetPresentation) == 'FOR PRESENTATION':
                filesListROI.append(fList[i])     
            else:
                print(    'Key rejected - for processing:\n', key)
        except ValueError:
            print('MY_ERROR: key not found:\n', key)
    return filesListROI

# Given a sheet and ImageSOPIUID, returns value at column x

def getSpreadsheetCell(column, ImageSOPIUID, sheet):
    # find index for ImageSOPIUID
    indx = [_==ImageSOPIUID for _ in sheet['ImageSOPIUID']].index(True)
    return sheet[column][indx]
   
# Import xls file, extract ROI coords, get pixel array from DICOM image
def buildDict(dicomImg, fileList):
    #xls = pd.ExcelFile('/vol/vssp/cvpwrkspc01/scratch/wm0015/download/batch_1_IMAGE.xls')
    #xls = pd.ExcelFile('/vol/vssp/cvpwrkspc01/scratch/wm0015/batch_50_IMAGE.xls')
    xls = pd.ExcelFile(SPREADSHEET)
    sheet = xls.parse(0)

    # Create a dict where the key is the image name
    # Each key has the image, and coords
    img = {}
    for i in range(len(fileList)):
        key = os.path.basename(fileList[i])[:-4]
        # Find row in the xml file that holds the img info
        try:
            indx = [_==key for _ in sheet['ImageSOPIUID']].index(True) # ImageSOPIUID, ReferencedSOPInstanceUID
            img.update({key:{}})
            img[key].update({'img': dicomImg[i].pixel_array})
            img[key].update({'x': [sheet['X1'][indx], sheet['X2'][indx]]})
            img[key].update({'y': [sheet['Y1'][indx], sheet['Y2'][indx]]})
        except ValueError:
            print('MY_ERROR: key not found:\n', key)

    print(len(img), 'DICOM images extracted')
    # print(img)
    # Crop the images to given ROI
    toDelete = [] # Keep track of error causing keys and delete after loop
    for key in img:
        try:
            tmp = img[key]['img']
            x = img[key]['x']
            y = img[key]['y']
            x = [int(x[0]), int(x[1])]
            y = [int(y[0]), int(y[1])]
            #x is width, y is height
            #in numpy array, y,x
            img[key].update({'cropROI':tmp[y[0]:y[1], x[0]:x[1]]}) 
        except:
            print('ROI extraction failed...Removing key\nkey:  ',key, '\nx  :', x,'\ny:  ',y)
            toDelete.append(key)

    #img_calc = img['1.2.840.113681.2230565232.954.3504500766.32'] 
    for _ in toDelete:
        img.pop(_)
    return img


# Write images to disk with markers and basic crop    
def writeMarkedImages(img): 
    for key in img:
        plt.figure(figsize=(20,20))
        marker = [(img[key]['x'][1] + img[key]['x'][0])/2, (img[key]['y'][1] + img[key]['y'][0])/2 ]   
        plt.imshow(img[key]['img']/16383, cmap='gray', vmin=0, vmax=0.2)
        plt.plot(marker[0], marker[1], marker='x', color=[1,0,1], markersize=30)
        plt.savefig('/vol/vssp/cvpwrkspc01/scratch/wm0015/markers/tmp/' + key +'_full.png')
        #plt.show()
        plt.close()

        plt.figure(figsize=(20,20))
        plt.imshow(img[key]['cropROI'], cmap='gray') 
        plt.savefig('/vol/vssp/cvpwrkspc01/scratch/wm0015/markers/tmp/' + key +'_crop.png')   
        #plt.show()
        plt.close()

# Crop the images so that the ROI is centred but all crops are the same size
def computeCrops(img):
    for key in img:
        tmp = img[key]['img']
        x = img[key]['x']
        y = img[key]['y']
        c = [round((x[0]+x[1])/2), round((y[0]+y[1])/2)]
        # Pad images before cropping (wrap around)
        pad = 1000
        tmp = np.pad(tmp, pad, mode='wrap')
        img[key].update({'crop': tmp[int(c[1]-CROP_SIZE/2+pad):int(c[1]+CROP_SIZE/2+pad), int(c[0]-CROP_SIZE/2+pad):int(c[0]+CROP_SIZE/2+pad)]})
        # Reshape from (256, 256) to (256, 256, 1)
        img[key]['crop'] = np.reshape(img[key]['crop'],(img[key]['crop'].shape[0], img[key]['crop'].shape[1], 1))
    return img
    
# Find bit depth
def findBitDepth(img):
    print('Find bit depth...')
    maxmax = 0
    for key in img:
        tmp = img[key]['img']
        print(np.amax(tmp))
        if np.amax(tmp) > maxmax:
            maxmax = np.amax(tmp)
    print('The largest value is: ', maxmax)
    
# Find average ROI size
def findAverageROISize(img):
    print('Find average ROI size...')
    totalX = 0
    totalY = 0
    for key in img:
        x = img[key]['x']
        y = img[key]['y']
        totalX += x[1] - x[0]
        totalY += y[1] - y[0]
    print('Average ROI width: ', totalX/len(img), '\nAverage ROI length: ', totalY/len(img))
    
# View crops / save to disk
def writeCropsToDisk(img):
    count = 0
    for key in img:
        count+=1
        f = open('/vol/vssp/cvpwrkspc01/scratch/wm0015/batch1_crop/' + key + '.png', 'wb')
        w = png.Writer(width = CROP_SIZE, height = CROP_SIZE, bitdepth=16, greyscale=True)
        w.write(f, img[key]['crop'])
        f.close()
        print(count, '/', len(img))
        if count == -1:
            break
            
def buildArrayForPickle(img):    #img.update({key:{}})
    allCrops = {}
    for key in img:
        allCrops.update({key:[]})
        allCrops[key] = img[key]['crop']
    return allCrops
        
def savePickle(ob):
    import pickle
    print('Pickling...')
    with open('/vol/vssp/cvpwrkspc01/scratch/wm0015/batch1_normalCrop400.pickle', 'wb') as output:
        pickle.dump(ob, output, pickle.HIGHEST_PROTOCOL)
        
def buildDictNormals(dicomImg, fileList):
    from scipy import ndimage
    # Get pixel values
    # Draw square around breast
    # Select random centre for crop within breast
    img = {}
    for i in range(len(fileList)):
        key = os.path.basename(fileList[i])[:-4]
        img.update({key:{}})
        img[key].update({'img': dicomImg[i].pixel_array})
        # Get centre of mass (breast centre)
        centre = ndimage.measurements.center_of_mass(img[key]['img'])
        centre = np.asarray(centre).astype(int)
        
#         #Check images
#         plt.figure(figsize=(20,20))  
#         #plt.imshow(img[key]['img']/16383, cmap='gray', vmin=0, vmax=0.2)
#         plt.imshow(img[key]['img'], cmap='gray')
#         plt.plot(centre[1], centre[0], marker='x', color=[1,0,1], markersize=30)
#         #plt.savefig('/vol/vssp/cvpwrkspc01/scratch/wm0015/markers/tmp/' + key +'_full.png')
#         plt.show()
#         plt.close()

        #LATER
#         img[key].update({'x': [sheet['X1'][indx], sheet['X2'][indx]]})
#         img[key].update({'y': [sheet['Y1'][indx], sheet['Y2'][indx]]})


print('Done')

Started...
Done


2692


In [20]:
# Load images into ram
dicomImg, fileList = getFiles(DICOM_FILES)    
print('Done')

3248  dicom images found
3248  Files found
Loading images...
1 / 3248
2 / 3248
3 / 3248
4 / 3248
5 / 3248
6 / 3248
7 / 3248
8 / 3248
9 / 3248
10 / 3248
11 / 3248
12 / 3248
13 / 3248
14 / 3248
15 / 3248
16 / 3248
17 / 3248
18 / 3248
19 / 3248
20 / 3248
21 / 3248
22 / 3248
23 / 3248
24 / 3248
25 / 3248
26 / 3248
27 / 3248
28 / 3248
29 / 3248
30 / 3248
31 / 3248
32 / 3248
33 / 3248
34 / 3248
35 / 3248
36 / 3248
37 / 3248
38 / 3248
39 / 3248
40 / 3248
41 / 3248
42 / 3248
43 / 3248
44 / 3248
45 / 3248
46 / 3248
47 / 3248
48 / 3248
49 / 3248
50 / 3248
51 / 3248
52 / 3248
53 / 3248
54 / 3248
55 / 3248
56 / 3248
57 / 3248
58 / 3248
59 / 3248
60 / 3248
61 / 3248
62 / 3248
63 / 3248
64 / 3248
65 / 3248
66 / 3248
67 / 3248
68 / 3248
69 / 3248
70 / 3248
71 / 3248
72 / 3248
73 / 3248
74 / 3248
75 / 3248
76 / 3248
77 / 3248
78 / 3248
79 / 3248
80 / 3248
81 / 3248
82 / 3248
83 / 3248
84 / 3248
85 / 3248
86 / 3248
87 / 3248
88 / 3248
89 / 3248
90 / 3248
91 / 3248
92 / 3248
93 / 3248
94 / 3248
95 / 324

751 / 3248
752 / 3248
753 / 3248
754 / 3248
755 / 3248
756 / 3248
757 / 3248
758 / 3248
759 / 3248
760 / 3248
761 / 3248
762 / 3248
763 / 3248
764 / 3248
765 / 3248
766 / 3248
767 / 3248
768 / 3248
769 / 3248
770 / 3248
771 / 3248
772 / 3248
773 / 3248
774 / 3248
775 / 3248
776 / 3248
777 / 3248
778 / 3248
779 / 3248
780 / 3248
781 / 3248
782 / 3248
783 / 3248
784 / 3248
785 / 3248
786 / 3248
787 / 3248
788 / 3248
789 / 3248
790 / 3248
791 / 3248
792 / 3248
793 / 3248
794 / 3248
795 / 3248
796 / 3248
797 / 3248
798 / 3248
799 / 3248
800 / 3248
801 / 3248
802 / 3248
803 / 3248
804 / 3248
805 / 3248
806 / 3248
807 / 3248
808 / 3248
809 / 3248
810 / 3248
811 / 3248
812 / 3248
813 / 3248
814 / 3248
815 / 3248
816 / 3248
817 / 3248
818 / 3248
819 / 3248
820 / 3248
821 / 3248
822 / 3248
823 / 3248
824 / 3248
825 / 3248
826 / 3248
827 / 3248
828 / 3248
829 / 3248
830 / 3248
831 / 3248
832 / 3248
833 / 3248
834 / 3248
835 / 3248
836 / 3248
837 / 3248
838 / 3248
839 / 3248
840 / 3248
841 / 3248

1455 / 3248
1456 / 3248
1457 / 3248
1458 / 3248
1459 / 3248
1460 / 3248
1461 / 3248
1462 / 3248
1463 / 3248
1464 / 3248
1465 / 3248
1466 / 3248
1467 / 3248
1468 / 3248
1469 / 3248
1470 / 3248
1471 / 3248
1472 / 3248
1473 / 3248
1474 / 3248
1475 / 3248
1476 / 3248
1477 / 3248
1478 / 3248
1479 / 3248
1480 / 3248
1481 / 3248
1482 / 3248
1483 / 3248
1484 / 3248
1485 / 3248
1486 / 3248
1487 / 3248
1488 / 3248
1489 / 3248
1490 / 3248
1491 / 3248
1492 / 3248
1493 / 3248
1494 / 3248
1495 / 3248
1496 / 3248
1497 / 3248
1498 / 3248
1499 / 3248
1500 / 3248
1501 / 3248
1502 / 3248
1503 / 3248
1504 / 3248
1505 / 3248
1506 / 3248
1507 / 3248
1508 / 3248
1509 / 3248
1510 / 3248
1511 / 3248
1512 / 3248
1513 / 3248
1514 / 3248
1515 / 3248
1516 / 3248
1517 / 3248
1518 / 3248
1519 / 3248
1520 / 3248
1521 / 3248
1522 / 3248
1523 / 3248
1524 / 3248
1525 / 3248
1526 / 3248
1527 / 3248
1528 / 3248
1529 / 3248
1530 / 3248
1531 / 3248
1532 / 3248
1533 / 3248
1534 / 3248
1535 / 3248
1536 / 3248
1537 / 3248
1538

2138 / 3248
2139 / 3248
2140 / 3248
2141 / 3248
2142 / 3248
2143 / 3248
2144 / 3248
2145 / 3248
2146 / 3248
2147 / 3248
2148 / 3248
2149 / 3248
2150 / 3248
2151 / 3248
2152 / 3248
2153 / 3248
2154 / 3248
2155 / 3248
2156 / 3248
2157 / 3248
2158 / 3248
2159 / 3248
2160 / 3248
2161 / 3248
2162 / 3248
2163 / 3248
2164 / 3248
2165 / 3248
2166 / 3248
2167 / 3248
2168 / 3248
2169 / 3248
2170 / 3248
2171 / 3248
2172 / 3248
2173 / 3248
2174 / 3248
2175 / 3248
2176 / 3248
2177 / 3248
2178 / 3248
2179 / 3248
2180 / 3248
2181 / 3248
2182 / 3248
2183 / 3248
2184 / 3248
2185 / 3248
2186 / 3248
2187 / 3248
2188 / 3248
2189 / 3248
2190 / 3248
2191 / 3248
2192 / 3248
2193 / 3248
2194 / 3248
2195 / 3248
2196 / 3248
2197 / 3248
2198 / 3248
2199 / 3248
2200 / 3248
2201 / 3248
2202 / 3248
2203 / 3248
2204 / 3248
2205 / 3248
2206 / 3248
2207 / 3248
2208 / 3248
2209 / 3248
2210 / 3248
2211 / 3248
2212 / 3248
2213 / 3248
2214 / 3248
2215 / 3248
2216 / 3248
2217 / 3248
2218 / 3248
2219 / 3248
2220 / 3248
2221

2821 / 3248
2822 / 3248
2823 / 3248
2824 / 3248
2825 / 3248
2826 / 3248
2827 / 3248
2828 / 3248
2829 / 3248
2830 / 3248
2831 / 3248
2832 / 3248
2833 / 3248
2834 / 3248
2835 / 3248
2836 / 3248
2837 / 3248
2838 / 3248
2839 / 3248
2840 / 3248
2841 / 3248
2842 / 3248
2843 / 3248
2844 / 3248
2845 / 3248
2846 / 3248
2847 / 3248
2848 / 3248
2849 / 3248
2850 / 3248
2851 / 3248
2852 / 3248
2853 / 3248
2854 / 3248
2855 / 3248
2856 / 3248
2857 / 3248
2858 / 3248
2859 / 3248
2860 / 3248
2861 / 3248
2862 / 3248
2863 / 3248
2864 / 3248
2865 / 3248
2866 / 3248
2867 / 3248
2868 / 3248
2869 / 3248
2870 / 3248
2871 / 3248
2872 / 3248
2873 / 3248
2874 / 3248
2875 / 3248
2876 / 3248
2877 / 3248
2878 / 3248
2879 / 3248
2880 / 3248
2881 / 3248
2882 / 3248
2883 / 3248
2884 / 3248
2885 / 3248
2886 / 3248
2887 / 3248
2888 / 3248
2889 / 3248
2890 / 3248
2891 / 3248
2892 / 3248
2893 / 3248
2894 / 3248
2895 / 3248
2896 / 3248
2897 / 3248
2898 / 3248
2899 / 3248
2900 / 3248
2901 / 3248
2902 / 3248
2903 / 3248
2904

In [100]:
# _______Conterlateral____________
# Get file names
lesionPath = '/vol/research/mammo2/will/data/batches/roi/batch_1/lesions'
fileListNames = getFileNames(lesionPath)

xls = pd.ExcelFile(SPREADSHEET)
sheet = xls.parse(0)
contralaterals = []
for _ in fileListNames:
    imageSOPIUD = os.path.basename(_)[:-4]
    tmp = getContralateral(imageSOPIUD, sheet)
    if tmp != None:
        contralaterals.append(tmp)
print(len(contralaterals))
print(len(contralaterals[5]))
print(contralaterals[1])



400  dicom images found
imageSOPIUD:  1.2.826.0.1.3680043.9.3218.1.1.1388821.1077.1517512309538.6235.0

lesion[viewPosition]:  latero-medial
lesion[imageLaterality]:  R
lesion[presentationIntentType]:  FOR PRESENTATION

propertiesToMatch[viewPosition]:  latero-medial
propertiesToMatch[imageLaterality]:  L
propertiesToMatch[presentationIntentType]:  FOR PRESENTATION
    MY_ERROR:  0  matches for contralateral
imageSOPIUD:  1.2.826.0.1.3680043.9.3218.1.1.13888212.1077.1517512309538.225.0

lesion[viewPosition]:  medio-lateral oblique
lesion[imageLaterality]:  L
lesion[presentationIntentType]:  FOR PROCESSING

propertiesToMatch[viewPosition]:  medio-lateral oblique
propertiesToMatch[imageLaterality]:  R
propertiesToMatch[presentationIntentType]:  FOR PROCESSING
match made
        MATCH MATCH MATCH len:  1
imageSOPIUD:  1.2.826.0.1.3680043.9.3218.1.1.1388821.1077.1517512309538.6225.0

lesion[viewPosition]:  latero-medial
lesion[imageLaterality]:  R
lesion[presentationIntentType]:  FOR PROCE

propertiesToMatch[presentationIntentType]:  FOR PRESENTATION
match made
        MATCH MATCH MATCH len:  1
imageSOPIUD:  1.2.826.0.1.3680043.9.3218.1.1.1388821.1077.1517512309538.2062.0

lesion[viewPosition]:  cranio-caudal
lesion[imageLaterality]:  L
lesion[presentationIntentType]:  FOR PROCESSING

propertiesToMatch[viewPosition]:  cranio-caudal
propertiesToMatch[imageLaterality]:  R
propertiesToMatch[presentationIntentType]:  FOR PROCESSING
    MY_ERROR:  0  matches for contralateral
imageSOPIUD:  1.2.826.0.1.3680043.9.3218.1.1.1388821.1077.1517512309538.2075.0

lesion[viewPosition]:  medio-lateral
lesion[imageLaterality]:  L
lesion[presentationIntentType]:  FOR PRESENTATION

propertiesToMatch[viewPosition]:  medio-lateral
propertiesToMatch[imageLaterality]:  R
propertiesToMatch[presentationIntentType]:  FOR PRESENTATION
    MY_ERROR:  0  matches for contralateral
imageSOPIUD:  1.2.826.0.1.3680043.9.3218.1.1.1388821.1077.1517512309538.4844.0

lesion[viewPosition]:  cranio-caudal
lesio

propertiesToMatch[viewPosition]:  cranio-caudal
propertiesToMatch[imageLaterality]:  R
propertiesToMatch[presentationIntentType]:  FOR PROCESSING
    MY_ERROR:  0  matches for contralateral
imageSOPIUD:  1.2.826.0.1.3680043.9.3218.1.1.1388821.1077.1517512309538.2005.0

lesion[viewPosition]:  medio-lateral oblique
lesion[imageLaterality]:  R
lesion[presentationIntentType]:  FOR PROCESSING

propertiesToMatch[viewPosition]:  medio-lateral oblique
propertiesToMatch[imageLaterality]:  L
propertiesToMatch[presentationIntentType]:  FOR PROCESSING
match made
        MATCH MATCH MATCH len:  1
imageSOPIUD:  1.2.826.0.1.3680043.9.3218.1.1.13888212.1077.1517512309538.820.0

lesion[viewPosition]:  medio-lateral oblique
lesion[imageLaterality]:  R
lesion[presentationIntentType]:  FOR PRESENTATION

propertiesToMatch[viewPosition]:  medio-lateral oblique
propertiesToMatch[imageLaterality]:  L
propertiesToMatch[presentationIntentType]:  FOR PRESENTATION
match made
        MATCH MATCH MATCH len:  1
imag

propertiesToMatch[viewPosition]:  medio-lateral
propertiesToMatch[imageLaterality]:  R
propertiesToMatch[presentationIntentType]:  FOR PROCESSING
    MY_ERROR:  0  matches for contralateral
imageSOPIUD:  1.2.826.0.1.3680043.9.3218.1.1.1388821.1077.1517512309538.2505.0

lesion[viewPosition]:  medio-lateral oblique
lesion[imageLaterality]:  L
lesion[presentationIntentType]:  FOR PRESENTATION

propertiesToMatch[viewPosition]:  medio-lateral oblique
propertiesToMatch[imageLaterality]:  R
propertiesToMatch[presentationIntentType]:  FOR PRESENTATION
match made
        MATCH MATCH MATCH len:  1
imageSOPIUD:  1.2.826.0.1.3680043.9.3218.1.1.1388821.1077.1517512309538.6970.0

lesion[viewPosition]:  cranio-caudal
lesion[imageLaterality]:  R
lesion[presentationIntentType]:  FOR PROCESSING

propertiesToMatch[viewPosition]:  cranio-caudal
propertiesToMatch[imageLaterality]:  L
propertiesToMatch[presentationIntentType]:  FOR PROCESSING
match made
match made
    MY_ERROR:  2  matches for contralateral

lesion[viewPosition]:  cranio-caudal
lesion[imageLaterality]:  L
lesion[presentationIntentType]:  FOR PROCESSING

propertiesToMatch[viewPosition]:  cranio-caudal
propertiesToMatch[imageLaterality]:  R
propertiesToMatch[presentationIntentType]:  FOR PROCESSING
match made
        MATCH MATCH MATCH len:  1
imageSOPIUD:  1.2.826.0.1.3680043.9.3218.1.1.13888212.1077.1517512309538.812.0

lesion[viewPosition]:  cranio-caudal
lesion[imageLaterality]:  R
lesion[presentationIntentType]:  FOR PRESENTATION

propertiesToMatch[viewPosition]:  cranio-caudal
propertiesToMatch[imageLaterality]:  L
propertiesToMatch[presentationIntentType]:  FOR PRESENTATION
match made
        MATCH MATCH MATCH len:  1
imageSOPIUD:  1.2.826.0.1.3680043.9.3218.1.1.1388821.1077.1517512309538.5354.0

lesion[viewPosition]:  cranio-caudal
lesion[imageLaterality]:  R
lesion[presentationIntentType]:  FOR PROCESSING

propertiesToMatch[viewPosition]:  cranio-caudal
propertiesToMatch[imageLaterality]:  L
propertiesToMatch[present

propertiesToMatch[viewPosition]:  medio-lateral oblique
propertiesToMatch[imageLaterality]:  L
propertiesToMatch[presentationIntentType]:  FOR PRESENTATION
match made
        MATCH MATCH MATCH len:  1
imageSOPIUD:  1.2.826.0.1.3680043.9.3218.1.1.1388821.1077.1517512309538.4893.0

lesion[viewPosition]:  cranio-caudal
lesion[imageLaterality]:  L
lesion[presentationIntentType]:  FOR PRESENTATION

propertiesToMatch[viewPosition]:  cranio-caudal
propertiesToMatch[imageLaterality]:  R
propertiesToMatch[presentationIntentType]:  FOR PRESENTATION
match made
match made
    MY_ERROR:  2  matches for contralateral
imageSOPIUD:  1.2.826.0.1.3680043.9.3218.1.1.1388821.1077.1517512309538.3496.0

lesion[viewPosition]:  cranio-caudal
lesion[imageLaterality]:  L
lesion[presentationIntentType]:  FOR PROCESSING

propertiesToMatch[viewPosition]:  cranio-caudal
propertiesToMatch[imageLaterality]:  R
propertiesToMatch[presentationIntentType]:  FOR PROCESSING
match made
        MATCH MATCH MATCH len:  1
imag

propertiesToMatch[viewPosition]:  medio-lateral oblique
propertiesToMatch[imageLaterality]:  R
propertiesToMatch[presentationIntentType]:  FOR PROCESSING
    MY_ERROR:  0  matches for contralateral
imageSOPIUD:  1.2.826.0.1.3680043.9.3218.1.1.1388821.1077.1517512309538.5982.0

lesion[viewPosition]:  cranio-caudal
lesion[imageLaterality]:  L
lesion[presentationIntentType]:  FOR PRESENTATION

propertiesToMatch[viewPosition]:  cranio-caudal
propertiesToMatch[imageLaterality]:  R
propertiesToMatch[presentationIntentType]:  FOR PRESENTATION
    MY_ERROR:  0  matches for contralateral
imageSOPIUD:  1.2.826.0.1.3680043.9.3218.1.1.1388821.1077.1517512309538.5170.0

lesion[viewPosition]:  medio-lateral oblique
lesion[imageLaterality]:  L
lesion[presentationIntentType]:  FOR PROCESSING

propertiesToMatch[viewPosition]:  medio-lateral oblique
propertiesToMatch[imageLaterality]:  R
propertiesToMatch[presentationIntentType]:  FOR PROCESSING
    MY_ERROR:  0  matches for contralateral
imageSOPIUD:  

In [124]:
# ___________Get ROIs______________

# Get list of files that are for presentation and have ROIs
# Copy these files to a new folder
from shutil import copyfile
dstCopy = '/vol/research/mammo2/will/data/batches/roi/batch_1/lesions_forPresentation'
xls = pd.ExcelFile(SPREADSHEET)
sheet = xls.parse(1)
sheetPres = xls.parse(0)

lesionPath = DICOM_FILES
fileListNames = getFileNames(lesionPath)
#Get list of files that are in the spreadsheet with ROIs
print('len(fileListNames): ', len(fileListNames))
fileListROI = getFileListROI(fileListNames, sheet, sheetPres)
#Copy files to new folder
for index, path in enumerate(fileListROI):
    copyfile(path, dstCopy + '/' + os.path.basename(fileList[index]))
    print(index, '\\', len(fileListROI) )


2692  dicom images found
len(fileListNames):  2692
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.2673.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.2685.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.2686.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.2680.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.2668.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.2674.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.1641.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.1643.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.1637.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.1639.0
MY_ERROR: key not found:
 1.2.826.0.1.3

 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.4327.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.4320.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.4321.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.4332.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.4339.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.4357.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.4347.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.4364.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.4349.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.4342.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.4362.0
MY_ERROR: key not found:

 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5775.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5768.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5769.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5763.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5796.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5787.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5785.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5782.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5792.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5790.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5783.0
MY_ERROR: key not found:

MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.68298180.9661.1516806470059.104.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.68298180.9661.1516806470059.132.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5362.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5370.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5360.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5364.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5366.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5368.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5347.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5336.0
MY_ERROR: key not found:
 1.2.826.0.1.3680043.9.3218.1.1.6829818.9661.1516806470059.5342.0

0 \ 385
1 \ 385
2 \ 385
3 \ 385
4 \ 385
5 \ 385
6 \ 385
7 \ 385
8 \ 385
9 \ 385
10 \ 385
11 \ 385
12 \ 385
13 \ 385
14 \ 385
15 \ 385
16 \ 385
17 \ 385
18 \ 385
19 \ 385
20 \ 385
21 \ 385
22 \ 385
23 \ 385
24 \ 385
25 \ 385
26 \ 385
27 \ 385
28 \ 385
29 \ 385
30 \ 385
31 \ 385
32 \ 385
33 \ 385
34 \ 385
35 \ 385
36 \ 385
37 \ 385
38 \ 385
39 \ 385
40 \ 385
41 \ 385
42 \ 385
43 \ 385
44 \ 385
45 \ 385
46 \ 385
47 \ 385
48 \ 385
49 \ 385
50 \ 385
51 \ 385
52 \ 385
53 \ 385
54 \ 385
55 \ 385
56 \ 385
57 \ 385
58 \ 385
59 \ 385
60 \ 385
61 \ 385
62 \ 385
63 \ 385
64 \ 385
65 \ 385
66 \ 385
67 \ 385
68 \ 385
69 \ 385
70 \ 385
71 \ 385
72 \ 385
73 \ 385
74 \ 385
75 \ 385
76 \ 385
77 \ 385
78 \ 385
79 \ 385
80 \ 385
81 \ 385
82 \ 385
83 \ 385
84 \ 385
85 \ 385
86 \ 385
87 \ 385
88 \ 385
89 \ 385
90 \ 385
91 \ 385
92 \ 385
93 \ 385
94 \ 385
95 \ 385
96 \ 385
97 \ 385
98 \ 385
99 \ 385
100 \ 385
101 \ 385
102 \ 385
103 \ 385
104 \ 385
105 \ 385
106 \ 385
107 \ 385
108 \ 385
109 \ 385
110 \ 385


In [128]:
# Get PresentationIntentType for every image
xls = pd.ExcelFile(SPREADSHEET)
sheetPres = xls.parse(0)

for _ in fileListROI:
    print(getSpreadsheetCell('PresentationIntentType', os.path.basename(_)[:-4], sheetPres))
print('len(fileListROI): ', len(fileListROI))


FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATION
FOR PRESENTATI

In [18]:
dicomImg, fileList = deletePreProcessed(dicomImg, fileList)
dicomImg = deletePreProcessed(dicomImg, fileList)
img = buildDict(dicomImg, fileList)
img = computeCrops(img)
#buildDictNormals(dicomImg, fileList)
# findAverageROISize(img)
# findBitDepth(img)
# writeMarkedImages(img)
# writeCropsToDisk(img)
# savePickle(buildArrayForPickle(img)) 

AttributeError: 'numpy.ndarray' object has no attribute 'PresentationIntentType'

In [None]:
# Copy contralateral image


In [41]:
# Read the pickle file
import pickle
objects = []
with (open("/vol/vssp/cvpwrkspc01/scratch/wm0015/batch1_malignantCrop256.pickle", "rb")) as openfile:
    while True:
        try:
            objects.append(pickle.load(openfile))
        except EOFError:
            break
# for key in image:
#     print(key)
#print(objects[0]['crop'].shape)
images = objects[0]

print('Find bit depth...')
maxmax = 0
for key in images:
    tmp = images[key]
    print(np.amax(tmp))
    if np.amax(tmp) > maxmax:
        maxmax = np.amax(tmp)
print('The largest value is: ', maxmax)




Find bit depth...
858
724
735
566
760
13717
657
618
511
760
2129
725
662
674
483
11723
772
566
1267
897
684
751
983
679
654
1556
861
565
729
673
679
788
616
7089
555
502
822
705
661
758
632
749
650
490
677
11926
11483
2129
704
668
665
1004
770
603
1051
682
1236
584
665
1050
654
830
1073
781
773
661
734
735
1319
724
797
729
665
998
744
781
9982
552
930
777
697
679
14222
773
763
952
631
773
658
791
532
747
1071
720
807
717
657
688
741
16383
729
780
617
772
536
522
533
618
674
982
607
798
798
694
694
742
731
1135
995
700
858
836
575
707
894
680
1075
722
1269
1115
668
732
753
563
865
1941
1400
661
768
772
1133
1154
697
509
615
16383
698
705
8948
616
867
864
2951
665
701
713
788
4739
875
684
884
7484
522
1137
1102
541
1113
740
617
1413
597
613
860
942
545
637
511
656
722
587
674
726
5446
685
816
1954
741
769
654
608
828
760
794
15812
2071
680
833
624
1211
704
572
604
606
582
769
599
718
535
577
594
905
502
569
504
749
655
584
785
16383
555
790
659
671
817
648
10970
763
752
12259
909
6188
75