In [None]:
import os
import numpy as np
import nibabel as nib
import pandas as pd
from PIL import Image
from tqdm import tqdm
import cv2
import warnings
warnings.filterwarnings('ignore')

data_dir = "../data/preprocessed_-350_1150"
excel_path = "../data/mark.xlsx"
bad_keys = ["确定排除", "极明显前倾", "明显左右倾"]
size = 512

def parse_one(item):
    if sum(item[bad_keys]) > 0:
        good_begin = 0
        good_end = 0
    else:
        good_begin = int(item["起始切片（眼眶部下界）"])*2
        good_end = int(item["终止切片（眼眶部上界）"])*2

    def handle_nii(filename):
        nii_img = nib.load(os.path.join(data_dir, filename + ".nii.gz"))
        data = nii_img.get_fdata()
        good_slice = []
        bad_slice = []
        for i in range(data.shape[2]):
            # 使用cv2.resize调整大小
            slice_data = cv2.resize(data[:,:,i], (size, size), interpolation=cv2.INTER_LINEAR)
            if good_begin <= i <= good_end:
                good_slice.append(slice_data)
            else:
                bad_slice.append(slice_data)
        return good_slice, bad_slice
    try:
        return handle_nii(str(int(item["dicom_id"])))
    except:
        gs1, bs1 =  handle_nii(str(int(item["dicom_id"]))+'_1')
        gs2, bs2 = handle_nii(str(int(item["dicom_id"]))+'_2')
        return gs1+gs2, bs1+bs2


In [None]:
df = pd.read_excel(excel_path)
nii_img = nib.load(f"../data/preprocessed_-350_1150/{int(df.iloc[6]['dicom_id'])}.nii.gz")
data = nii_img.get_fdata()
data.shape

In [None]:
df = pd.read_excel(excel_path)

good_slices = []
bad_slices = []

for idx in tqdm(range(len(df))):
    try:
        item = df.iloc[idx]
        gs, bs = parse_one(item)
        good_slices.extend(gs)
        bad_slices.extend(bs)
    except:
        pass

In [None]:
data_list = []

for slice_data in tqdm(good_slices):
    data_list.append({
        'slice': slice_data.tolist(),
        'is_eye': True
    })
for slice_data in tqdm(bad_slices):
    data_list.append({
        'slice': slice_data.tolist(),
        'is_eye': False
    })

df = pd.DataFrame(data_list)

os.makedirs('../data/parquet', exist_ok=True)
df.to_parquet(f'../data/parquet/{size}-{"_".join(bad_keys)}.parquet', index=False)