In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src

In [None]:
import os
import cv2
import json
import glob
import torch
import pydicom
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import torch.nn.functional as F

from collections import Counter
from tqdm.notebook import tqdm


pd.set_option('display.width', 500)
pd.set_option('max_colwidth', 100)

In [None]:
from params import *

from data.preparation import *

from data.dataset import *
from data.transforms import *

### External data

- https://www.kaggle.com/datasets/brendanartley/lumbar-coordinate-pretraining-dataset

In [None]:
df = pd.read_csv(DATA_PATH + "coords/coords_pretrain.csv")

df["img_path"] = (
    DATA_PATH + "coords/data/processed_" + df["source"] + "_jpgs/" + df["filename"]
)
df = df.sort_values(["source", "filename", "level"])
df = df.groupby(["source", "filename", "img_path"]).agg(list).reset_index()

In [None]:
idx = 0
img = cv2.imread(df['img_path'][idx], 0)

plt.figure(figsize=(8, 8))
plt.imshow(img, cmap="gray")
for x, y, l in zip(df['x'][idx], df['y'][idx], df['level'][idx]):
    # print(x, y, l)
    plt.text(x, y, f"x   {l}", c="r", horizontalalignment="left", size=12)
plt.axis(False)
plt.show()

### Comp data

In [None]:
SAVE = False
PLOT = False

SAVE_FOLDER = "../input/coords/comp_data/"
os.makedirs(SAVE_FOLDER, exist_ok=True)

In [None]:
df = prepare_data()
df = df.dropna(axis=0).reset_index(drop=True)

In [None]:
dfs = []
for idx in tqdm(range(len(df))):
    if df['orient'][idx] == "Axial":
        continue
    # if not df['series_id'][idx] == 4089185953:
    #     continue

    img = np.load(df['img_path'][idx])

    img = img[len(img) // 2]
    # img = np.concatenate([
    #     img[len(img) // 4][..., None],
    #     img[len(img) // 2][..., None],
    #     img[3 * len(img) // 4][..., None],
    # ], -1)

    img = np.clip(img, np.percentile(img.flatten(), 0), np.percentile(img.flatten(), 98))  # DO NOT FORGET
    img = (img - img.min()) / (img.max() - img.min()) 
    img = (img * 255).astype(np.uint8)

    df_coords = pd.DataFrame(
        df['coords'][idx], df['level'][idx], columns=['z', 'x', 'y']
    ).reset_index()
    df_coords = df_coords.groupby('index').mean().reset_index()
    df_coords = df_coords.rename(columns={"index": "level"}).sort_values('level', ignore_index=True)

    df_coords['relative_x'] = df_coords['x'] / img.shape[1]
    df_coords['relative_y'] = df_coords['y'] / img.shape[0]

    cols = ["study_id", "series_id", "orient", "weighting"]
    for col in cols:
        df_coords[col] = df[col][idx]

    df_coords['img_path'] = SAVE_FOLDER + f'{df["study_id"][idx]}_{df["series_id"][idx]}.png'
    dfs.append(df_coords)

    # if len(df_coords) == 5:
    #     continue
    # print(df['series_id'][idx])

    if SAVE:
        cv2.imwrite(df_coords["img_path"][0], img)

    if PLOT:
        plt.figure(figsize=(8, 8))
        plt.imshow(img, cmap="gray")
        for x, y, l in zip(df_coords['x'], df_coords['y'], df_coords['level']):
            # print(x, y, l)
            plt.text(x, y, f"x   {l}", c="r", horizontalalignment="left", size=12)
        # plt.axis(False)
        plt.show()
    
        # if idx > 5:
        break

In [None]:
df_ = pd.concat(dfs)
df_ = df_[
    ["study_id", "series_id", "img_path", "level", "x", "y", "relative_x", "relative_y"]
]
df_.to_csv("../input/coords/coords_comp.csv", index=False)
df_.head(1)

In [None]:
# len(df_.series_id.unique()), len(os.listdir(SAVE_FOLDER))

In [None]:
df_coords.target[0].shape

In [None]:
from data.dataset import CoordsDataset
from data.preparation import prepare_coords_data

df_coords = prepare_coords_data()

dataset = CoordsDataset(df_coords, transforms=get_transfos(resize=(384, 384), strength=0, use_keypoints=True))

In [None]:
df_coords.head(1)

In [None]:
# idx = np.random.choice(len(dataset))
x, y, _  = dataset[idx]

In [None]:
y

In [None]:
df_coords["target_rel"].values.shape

In [None]:
dataset.targets_rel.max()

In [None]:
for idx in tqdm(range(len(dataset))):
    x, y, _  = dataset[idx]
    assert y.size() == torch.Size([5, 2])
    break

In [None]:

y = y[y.sum(-1) > 0]
img = x[0]
img = (img - img.min()) / (img.max() - img.min())
y *= img.shape[0]

plt.figure(figsize=(10, 10))
plt.imshow(img, cmap="gray")
plt.scatter(y[:, 0], y[:, 1])
plt.show()

### Axial Coords

In [None]:
from data.sagital_to_axial import read_series_metadata

In [None]:
df_ = prepare_data()
df_coords = pd.read_csv(DATA_PATH + "train_label_coordinates.csv")

In [None]:
SAVE = True
PLOT = False

SAVE_FOLDER = "../input/coords/axial/"
os.makedirs(SAVE_FOLDER, exist_ok=True)

In [None]:
dfs = []
for idx in tqdm(range(len(df))):
    if df['orient'][idx] != "Axial":
        continue

    # if idx not in [977, 1940, 5298]:
    #     continue

    df_axial = read_series_metadata(
        df["study_id"][idx],
        df["series_id"][idx],
        "axial",
        data_path=DATA_PATH + "train_images/",
    )

    coords_series = df_coords[df_coords['series_id'] == df["series_id"][idx]]

    folder = DATA_PATH + f'train_images/{df["study_id"][idx]}/{df["series_id"][idx]}/'

    if PLOT:
        plt.figure(figsize=(25, 5))
    
    for i, lvl in enumerate(LEVELS):
        coords = coords_series[coords_series['level'] == lvl].reset_index(drop=True)
        if len(coords) != 2:
            continue
        coords['condition'] = coords['condition'].apply(lambda x: x.split()[0])
        coords = coords.rename(columns={"condition": "side", "instance_number": "z"})

        try:
            frame = int(np.round(coords['z'].mean()))
            img = pydicom.dcmread(folder + f'{frame}.dcm').pixel_array
        except:
            frame = coords['z'][0]
            img = pydicom.dcmread(folder + f'{frame}.dcm').pixel_array

        # img = np.clip(img, np.percentile(img.flatten(), 0), np.percentile(img.flatten(), 98))  # DO NOT FORGET
        img = (img - img.min()) / (img.max() - img.min()) 
        img = (img * 255).astype(np.uint8)

        coords['relative_x'] = coords['x'] / img.shape[1]
        coords['relative_y'] = coords['y'] / img.shape[0]

        dy = np.abs(coords['relative_y'][0] - coords['relative_y'][1])
        dx = np.abs(coords['relative_x'][0] - coords['relative_x'][1])
        if dy > 0.1 or dx > 0.2:  # 977, 1940, 5298
            # print(idx, dy, dx)
            continue  # Skip

        coords['img_path'] = SAVE_FOLDER + f'{df["study_id"][idx]}_{df["series_id"][idx]}_{LEVELS_[i]}.png'
        dfs.append(coords)

        if SAVE:
            cv2.imwrite(coords["img_path"][0], img)
        
        if PLOT:
            plt.subplot(1, 5, i + 1)
            plt.imshow(img, cmap="gray")
            plt.scatter(coords["x"], coords["y"], c="orange")
            plt.title(lvl)
    
    if PLOT:
        plt.show()
    
        # if idx > 10:
        #     break

In [None]:
df_ = pd.concat(dfs, ignore_index=True)
df_ = df_[
    ["study_id", "series_id", "img_path", "side", "x", "y", "relative_x", "relative_y"]
]
df_.to_csv("../input/coords/coords_ax.csv", index=False)

df_.head(10)

In [None]:
d = (df_[df_["side"] == "Left"]['x'].values <= df_[df_["side"] == "Right"]['x'].values)

Counter(d)

### Dataset

In [None]:
df = prepare_coords_data()

df.head(1)

In [None]:
transforms = get_transfos(augment=False, resize=(384, 384))

In [None]:
dataset = CoordsDataset(df, transforms=transforms)

In [None]:
idx = 3
img, y, _ = dataset[idx]

In [None]:
for idx in tqdm(range(len(dataset))):
    img, y, _ = dataset[idx]
    break

In [None]:
plt.imshow(img.cpu().numpy().transpose(1, 2, 0))
plt.scatter(y[:, 0] * img.size(2), y[:, 1] * img.size(1), marker="x")
plt.axis(False)
plt.show()

Done ! 