In [10]:
import numpy as np
import os
import pandas as pd
import pydicom
from pydicom.dataset import FileDataset
from pathlib import Path
from typing import Generator
from sklearn.model_selection import KFold, StratifiedKFold

import nibabel as nib
import numpy as np
from nibabel import Nifti1Image
import matplotlib.pyplot as plt
from matplotlib import animation

from IPython.display import HTML

In [2]:
image_sizes = [128, 128, 128]

data_dir = "/Volumes/Data/kaggle/raw"

In [22]:
df_train: pd.DataFrame = pd.read_csv(os.path.join(data_dir, 'train.csv'))

df_bounding: pd.DataFrame = pd.read_csv(
  os.path.join(data_dir, 'train_bounding_boxes.csv')
)

mask_files: list = os.listdir(f'{data_dir}/segmentations')
df_mask: pd.DataFrame = pd.DataFrame({
    'mask_file': mask_files,
})
df_mask['StudyInstanceUID'] = df_mask['mask_file'].apply(
  lambda x: x[:-4]
)
df_mask['mask_file'] = df_mask['mask_file'].apply(
  lambda x: os.path.join(data_dir, 'segmentations', x)
)
df = df_train.merge(df_mask, on='StudyInstanceUID', how='left')
df['image_folder'] = df['StudyInstanceUID'].apply(
  lambda x: os.path.join(data_dir, 'train_images', x)
)
df_seg = df[df["mask_file"].notna()].reset_index(drop=True)

kf: KFold = KFold(5)
df_seg['fold'] = -1
for fold, (train_idx, validation_idx) in enumerate(kf.split(df_seg, df_seg)):
  df_seg.loc[validation_idx, 'fold'] = fold

In [7]:
niiSlices = nib.load(
  filename=df_seg.loc[0, "mask_file"]
).get_fdata()[:, ::-1, ::-1].transpose(2, 1, 0)

In [11]:
fig, ax = plt.subplots(figsize=(6,6))

# ims is a list of lists, each row is a list of artists to draw in the
ims = []
for i, frame in enumerate(niiSlices):
    im = ax.imshow( frame )
    ttl = plt.text(
        0.5, 
        1.01,  
        np.unique(frame), 
        horizontalalignment='center', 
        verticalalignment='bottom', 
        transform=ax.transAxes
    )

    ims.append([im, ttl])

ani = animation.ArtistAnimation(fig, ims, interval=10 ,repeat_delay=1000)

plt.close()
HTML(ani.to_html5_video())



In [12]:
def load_dicoms(path: str) -> list[FileDataset]:
  dicom_file_names: list[str] = os.listdir(path=path)
  
  dicom_files: list[FileDataset] = []
  
  for dicom_file_name in dicom_file_names:
    dicom_files.append(
      pydicom.read_file(os.path.join(path, dicom_file_name))
    )
  
  
  return sorted(
    dicom_files,
    key= lambda x: x.InstanceNumber
  )

In [39]:
study: str = "1.2.826.0.1.3680043.5783"
study_row: pd.DataFrame = df[df["StudyInstanceUID"] == study].reset_index(drop=True)
data = load_dicoms(path = study_row.loc[0, "image_folder"])
data

[Dataset.file_meta -------------------------------
 (0002, 0001) File Meta Information Version       OB: b'\x00\x01'
 (0002, 0002) Media Storage SOP Class UID         UI: CT Image Storage
 (0002, 0003) Media Storage SOP Instance UID      UI: 1.2.826.0.1.3680043.5783.1.1
 (0002, 0010) Transfer Syntax UID                 UI: Explicit VR Little Endian
 (0002, 0012) Implementation Class UID            UI: 1.2.40.0.13.1.1.1
 (0002, 0013) Implementation Version Name         SH: 'PYDICOM 2.3.0'
 -------------------------------------------------
 (0008, 0018) SOP Instance UID                    UI: 1.2.826.0.1.3680043.5783.1.1
 (0008, 0023) Content Date                        DA: '20220727'
 (0008, 0033) Content Time                        TM: '183317.657543'
 (0010, 0010) Patient's Name                      PN: '5783'
 (0010, 0020) Patient ID                          LO: '5783'
 (0018, 0050) Slice Thickness                     DS: '0.625'
 (0020, 000d) Study Instance UID                  UI: 

In [41]:
fig, ax = plt.subplots(figsize=(6,6))

# ims is a list of lists, each row is a list of artists to draw in the
ims = []
for i, frame in enumerate(data):
    im = ax.imshow( frame.pixel_array, cmap=plt.cm.bone )
    slice_number: int = i + 1
    df_current_study = df_bounding[df_bounding["StudyInstanceUID"] == study]
    df_current_study_slice = df_current_study[
      df_current_study["slice_number"] == slice_number
    ]
    
    if len(df_current_study_slice) > 0:
      x_min: float = df_current_study_slice.reset_index().loc[0, "x"]
      y_min: float = df_current_study_slice.reset_index().loc[0, "y"]
      w: float = df_current_study_slice.reset_index().loc[0, "width"]
      h: float = df_current_study_slice.reset_index().loc[0, "height"]
      ttl, = ax.plot(
        [x_min, x_min+w, x_min+w, x_min, x_min],
        [y_min, y_min, y_min+h, y_min+h, y_min],
        color="red",
        linewidth=1
      )
      ims.append([im, ttl])
    else:
      ims.append([im])

ani = animation.ArtistAnimation(fig, ims, interval=10 ,repeat_delay=1000)

plt.close()
HTML(ani.to_html5_video())

In [35]:
ani = animation.ArtistAnimation(fig, ims, interval=10 ,repeat_delay=2000)

HTML(ani.to_html5_video())

In [38]:
df_bounding.groupby("StudyInstanceUID").count().sort_values(by="x")

Unnamed: 0_level_0,x,y,width,height,slice_number
StudyInstanceUID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1.2.826.0.1.3680043.780,2,2,2,2,2
1.2.826.0.1.3680043.27016,2,2,2,2,2
1.2.826.0.1.3680043.10579,3,3,3,3,3
1.2.826.0.1.3680043.17208,4,4,4,4,4
1.2.826.0.1.3680043.12785,5,5,5,5,5
...,...,...,...,...,...
1.2.826.0.1.3680043.19778,100,100,100,100,100
1.2.826.0.1.3680043.21321,132,132,132,132,132
1.2.826.0.1.3680043.31077,143,143,143,143,143
1.2.826.0.1.3680043.25772,166,166,166,166,166
