#### Code to train models

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src

## Imports

In [None]:
import os
import cv2
import glob
import gdcm
import pydicom
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image
from tqdm.notebook import tqdm

In [None]:
from params import *

In [None]:
def read_xray(path):
    metadata = pydicom.read_file(path, stop_before_pixels=True)
    data = pydicom.read_file(path).pixel_array
               
    if metadata.PhotometricInterpretation == "MONOCHROME1":  # Inverted xray
        data = np.max(data) - data
        
#     print(np.min(data), np.max(data))
#     print(dicom[(0x28, 0x4)])
#     print(dicom[(0x8, 0x60)])

    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
#     return data
    return data, metadata

### Params

In [None]:
SIZE = 512
DATABASE = "train"
SAVE_DIR = DATA_PATH + f"{DATABASE}_{SIZE}/"

if not os.path.exists(SAVE_DIR):
    os.mkdir(SAVE_DIR)

In [None]:
df = pd.read_csv(DATA_PATH + "train_image_level.csv")

In [None]:
infos = []
for i in tqdm(range(len(df))):
    
    study = df['StudyInstanceUID'][i]
    image = df['id'][i].split('_')[0]
    
    study_path = os.path.join(DATA_PATH, DATABASE, study)
    series = os.listdir(study_path)
    
    found = False
    for s in series:
        img_path = os.path.join(study_path, s, image + ".dcm")
        if os.path.exists(img_path):
            found = True
            break
            
    if not found:
        print(f'Image {i} not found')
        continue
#         return 0
    
    img, meta = read_xray(img_path)
    shape = img.shape
    
    if not (i % 100):
        plt.figure(figsize=(9, 9))
        plt.imshow(img, cmap="gray")
        plt.show()
        
    img = cv2.resize(img, size, interpolation=cv2.INTER_LINEAR)
    
    save_name = f"{study}_{image}.png"
    cv2.imwrite(SAVE_DIR + save_name, img)

    info_dic = {
        "study": [study],
        "series": [s],
        "img": [image],
        "save_name": [save_name],
        "shape": [shape],
        "photometric_interpreation": [meta.PhotometricInterpretation],
        "series_number": [meta.SeriesNumber],
        "instance_number": [meta.InstanceNumber],
    }
    infos.append(pd.DataFrame.from_dict(info_dic))

#     break

infos = pd.concat(infos).reset_index(drop=True)
infos.to_csv(DATA_PATH + f'df_{DATABASE}_{SIZE}.csv', index=False)

In [None]:
infos.head()