In [None]:
from tensorflow.keras.layers import Input, Lambda, Dense, Flatten,Dropout,Conv2D,MaxPooling2D, BatchNormalization, LSTM
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.preprocessing import image
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras_preprocessing.image.dataframe_iterator import DataFrameIterator
import matplotlib.pyplot as plt
import pydicom
import scipy.ndimage
from sklearn.cluster import KMeans
from skimage import morphology
from skimage import measure
from skimage.transform import resize
from glob import glob
from skimage.io import imread
from zipfile import ZipFile as zf

lungs = load_model('/kaggle/input/lungs-detector/model.h5')
lungs.load_weights('/kaggle/input/lungs-detector/model-weights.h5')

#total studies 7278
df = pd.read_csv("/kaggle/input/rsna-str-pulmonary-embolism-detection/train.csv")
lst, sublst = [], []
watcher = ''
for idx, x in df.iterrows():
    
    if idx == 0:
        watcher = x['StudyInstanceUID']
        
    if watcher != x['StudyInstanceUID']:
        lst.append(sublst)
        watcher = x['StudyInstanceUID']
        sublst = []
         
    else:
        sublst += [x]
        
    if len(lst) >= 10:
        break

#ignore this
"""   
zz = zf('test.zip','w')
for patient in lst:
    for rec in patient:
        location = '/kaggle/input/rsna-str-pulmonary-embolism-detection/train/'+rec['StudyInstanceUID'] + '/' + rec['SeriesInstanceUID'] + '/' + rec['SOPInstanceUID'] + '.dcm'
        zz.write(location)
zz.close()
"""
        
        

def trans(img):
    img_2d = img.astype(float)
    img_2d_scaled = (np.maximum(img_2d,0) / img_2d.max()) * 255.0
    img_2d_scaled = np.uint8(img_2d_scaled)
    return img_2d_scaled

def load_scan(path):
    slices = [pydicom.read_file(path + '/' + s) for s in os.listdir(path)]
    slices.sort(key = lambda x: int(x.InstanceNumber))
    copy = []
    for i in range(len(slices)):
        if i == 0:
            copy.append([np.array(trans(slices[i].pixel_array)), str(slices[i].get(0x00080018).value), str(slices[i].get(0x00080018).value), str(slices[i+1].get(0x00080018).value)])
        elif i == len(slices) - 1:
            copy.append([np.array(trans(slices[i].pixel_array)), str(slices[i].get(0x00080018).value), str(slices[i-1].get(0x00080018).value), str(slices[i].get(0x00080018).value)])
        else:
            copy.append([np.array(trans(slices[i].pixel_array)), str(slices[i].get(0x00080018).value), str(slices[i-1].get(0x00080018).value), str(slices[i+1].get(0x00080018).value)])
    
    return copy

def check(x):
    #get x[0] as in int is x is a 'pydicom.multival.MultiValue', otherwise get int(x)
    if type(x) == pydicom.multival.MultiValue: return int(x[0])
    else: return int(x)
    
def window(path, WL=100, WW=700):
    f = pydicom.dcmread(path)
    intercept = check(f[('0028','1052')].value)
    slope = check(f[('0028','1053')].value)
    img = f.pixel_array
    img = (img*slope +intercept) #for translation adjustments given in the dicom file. 
    upper, lower = WL+WW//2, WL-WW//2
    X = np.clip(img.copy(), lower, upper)
    X = X - np.min(X)
    X = X / np.max(X)
    X = (X*255.0).astype('uint8')
    X = np.expand_dims(X, axis=2)
    return X

def fuser(ds1,ds2,ds3):
    img1 = window(ds1)
    img2 = window(ds2)
    img3 = window(ds3)
    img = np.concatenate([img1,img2,img3], axis=2)
    return img

def sample_stack(stack, rows=6, cols=6, start_with=0, show_every=0):

    res = []
    max_val = 0
    white_sum = 600
    filter_percentage = 0.7
    filter_percentage2 = 0.6
    filter_val = 0
    
        
    def masker(img, w=64, h=64, threshold=200):
        img = resize(img, (w,h))
        img2 = img.reshape(1,w,h,1)
        pred = lungs(img2, training=False).numpy()
        pred = pred.reshape(w,h,1)
        pred = trans(pred)
        white = np.count_nonzero(np.all(pred>=[threshold], axis=2))
        return pred, white

    
    if show_every == 0:
        show_every = int( len(stack) / (rows * cols) )
    
    
    for i in range(rows*cols):
        ind = start_with + i * show_every
        if ind > len(stack) - 1:
            break
        img = stack[ind][0] #pixel data
        name = stack[ind][1] #SOP instance UID
        before = stack[ind][2] # previous SOP instance UID
        after = stack[ind][3] #next SOP instance UID
        pred, white = masker(img)
        
        if max_val < white:
            max_val = white
        res.append([pred, white, name, before, after])
        
    if max_val < white_sum:
        filter_val = max_val * filter_percentage
    else:
        filter_val = max_val * filter_percentage2
    
    final = []
    hcount = 0
    wcount = 0
    limit = 8
    for el in res:
        if el[1] >= filter_val:
            final.append([ el[2], el[3], el[4] ]) #adding the file names, before and after

    
    result = [final[int(len(final) * 0.2)],
         final[int(len(final) * 0.3)],
         final[int(len(final) * 0.4)],
         final[int(len(final) * 0.5)],
         final[int(len(final) * 0.6)],
         final[int(len(final) * 0.7)]
        ]
        
    return result
        
        
    
new_file = pd.DataFrame(columns=['StudyInstanceUID','SeriesInstanceUID','SOPInstanceUID','pe_present_on_image','negative_exam_for_pe','qa_motion','qa_contrast','flow_artifact','rv_lv_ratio_gte_1','rv_lv_ratio_lt_1','leftsided_pe','chronic_pe','true_filling_defect_not_pe','rightsided_pe','acute_and_chronic_pe','central_pe','indeterminate','before','after','image'
])
for el in lst:
    data = load_scan('/kaggle/input/rsna-str-pulmonary-embolism-detection/train/' + el[0]['StudyInstanceUID']  + '/' + el[0]['SeriesInstanceUID'])
    chosen_files = sample_stack(data)
    
    for e in el:
        for c in chosen_files:
            if e['SOPInstanceUID'] == c[0]:
                temp = {'StudyInstanceUID':e['StudyInstanceUID'],
                        'SeriesInstanceUID':e['SeriesInstanceUID'],
                        'SOPInstanceUID':e['SOPInstanceUID'],
                        'pe_present_on_image':e['pe_present_on_image'],
                        'negative_exam_for_pe':e['negative_exam_for_pe'],
                        'qa_motion':e['qa_motion'],
                        'qa_contrast':e['qa_contrast'],
                        'flow_artifact':e['flow_artifact'],
                        'rv_lv_ratio_gte_1':e['rv_lv_ratio_gte_1'],
                        'rv_lv_ratio_lt_1':e['rv_lv_ratio_lt_1'],
                        'leftsided_pe':e['leftsided_pe'] * e['pe_present_on_image'],
                        'chronic_pe':e['chronic_pe'] * e['pe_present_on_image'],
                        'true_filling_defect_not_pe':e['true_filling_defect_not_pe'],
                        'rightsided_pe':e['rightsided_pe'] * e['pe_present_on_image'],
                        'acute_and_chronic_pe':e['acute_and_chronic_pe'] * e['pe_present_on_image'],
                        'central_pe':e['central_pe'] * e['pe_present_on_image'],
                        'indeterminate':e['indeterminate'],
                        'before': c[1],
                        'after': c[2],
                        'image': 'csv_creator_1/{}-{}-{}.png'.format(e['StudyInstanceUID'], e['SeriesInstanceUID'], e['SOPInstanceUID'])
                       }
                new_file = pd.concat([new_file, pd.DataFrame([e])], ignore_index=True)
                ds1 = '/kaggle/input/rsna-str-pulmonary-embolism-detection/train/{}/{}/{}.dcm'.format(e['StudyInstanceUID'], e['SeriesInstanceUID'], c[0])
                ds2 = '/kaggle/input/rsna-str-pulmonary-embolism-detection/train/{}/{}/{}.dcm'.format(e['StudyInstanceUID'], e['SeriesInstanceUID'], c[1])
                ds3 = '/kaggle/input/rsna-str-pulmonary-embolism-detection/train/{}/{}/{}.dcm'.format(e['StudyInstanceUID'], e['SeriesInstanceUID'], c[2])
                img = fuser(ds1, ds2, ds3)
                plt.imsave('{}-{}-{}.png'.format(e['StudyInstanceUID'], e['SeriesInstanceUID'], e['SOPInstanceUID']), img)
                break
            
new_file.to_csv('train.csv', index=False)

            