In [None]:
!pip install -U pylibjpeg pylibjpeg-openjpeg pylibjpeg-libjpeg pydicom python-gdcm

In [None]:
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import numpy as np # for linear algebra
import pydicom

# For showing image on colab
import matplotlib.pylab as plt
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from tqdm.notebook import tqdm
import glob

import gdcm
import seaborn as sns
from joblib import Parallel, delayed

import os # for doing directory operations 
import pydicom as dicom # for reading dicom files
from PIL import ImageTk, Image
import cv2 # for image pre-processing
from google.colab.patches import cv2_imshow

from skimage.transform import resize 

# For warnings
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
train_images = glob.glob("/content/gdrive/MyDrive/Mammography/*/*.dcm")
print(len(train_images))

In [None]:
f = "/content/gdrive/MyDrive/Mammography/822670188/RCC.dcm"
patient = f.split('/')[-2]
image = f.split('/')[-1][:-4]

dicom = pydicom.dcmread(f)
img = dicom.pixel_array

img = (img - img.min()) / (img.max() - img.min())

if dicom.PhotometricInterpretation == "MONOCHROME1":
    img = 1 - img
    
plt.figure(figsize=(15, 15))
plt.imshow(img, cmap="gray")
plt.title(f"{patient} {image}")
plt.show()

In [None]:
SAVE_FOLDER = "/content/gdrive/MyDrive/new_mamo/"
SIZE = 512
EXTENSION = "png"
DEBUG = False

INTERPOLATION = cv2.INTER_CUBIC
#INTERPOLATION = cv2.INTER_NEAREST
#INTERPOLATION = cv2.INTER_LINEAR
#INTERPOLATION = cv2.INTER_AREA
#INTERPOLATION = cv2.INTER_LANCZOS4

os.makedirs(SAVE_FOLDER, exist_ok=True)

In [None]:
def process(f, size=512, save_folder="", extension="png"):
    try:
      patient = f.split('/')[-2]
      image = f.split('/')[-1][:-4]

      dicom = pydicom.dcmread(f)
      img = dicom.pixel_array

      img = (img - img.min()) / (img.max() - img.min())

      if dicom.PhotometricInterpretation == "MONOCHROME1":
          img = 1 - img

      img = cv2.resize(img, (size, size), interpolation=INTERPOLATION)

      cv2.imwrite(save_folder + f"{patient}_{image}.{extension}", (img * 255).astype(np.uint8))
    except:
      pass

In [None]:
_ = Parallel(n_jobs=4)(
    delayed(process)(uid, size=SIZE, save_folder=SAVE_FOLDER, extension=EXTENSION)
    for uid in tqdm(train_images[:10] if DEBUG else train_images)
)
# https://www.kaggle.com/code/mikecho/rsna-breast-cancer-dicom-png-lanczos4