In [74]:
from preprocessing import *
from pathlib import Path
from tqdm import tqdm

In [75]:
preprocessed_dataset_dir = Path("/home/ja8818/data/preprocessing/preprocessed_octs/")

In [76]:
amd_train = [0, 1, 3, 5, 6]
amd_test = [2, 4]
control_train = [0, 1, 3, 4, 5, 6, 9, 10, 11, 12]
control_test = [2, 7, 8]

# Point dataset generation

In [77]:
layers = ['INFL', 'ONFL', 'IPL', 'OPL', 'ICL', 'RPE']

def get_points_from_subjects_eye(preprocessed_octs):
    points = np.zeros((0, 5))
    labels = np.zeros((0,))
    
    for scan in preprocessed_octs:
        label = scan.label()
        thicknesses = []
        for inner_layer, outer_layer in zip (layers[:-1], layers[1:]):
            thicknesses.append(scan.thickness(inner_layer, outer_layer))
        points = np.concatenate((points, np.asarray(thicknesses).T), axis=0)
        labels = np.concatenate((labels, scan.label()))
        
        if points.shape[0] != len(labels):
            raise ValueError(f"Size mismatch: {scan}")
    
    return points, labels

In [78]:
amd_train_points, amd_test_points = np.zeros((0, 5)), np.zeros((0, 5))
control_train_points, control_test_points = np.zeros((0, 5)), np.zeros((0, 5))

amd_train_labels, amd_test_labels = np.zeros((0,)), np.zeros((0,))
control_train_labels, control_test_labels = np.zeros((0,)), np.zeros((0,))

for filename in tqdm(list(preprocessed_dataset_dir.glob("*.pickle"))):
    group, id, eye = filename.stem.split('_')
    
    with filename.open("rb") as file:
        preprocessed_octs = pickle.load(file)
    points, labels = get_points_from_subjects_eye(preprocessed_octs)
    
    if group == "amd":
        if int(id) in amd_train:
            amd_train_points = np.concatenate((amd_train_points, points), axis=0)
            amd_train_labels = np.concatenate((amd_train_labels, labels), axis=0)
        else:
            amd_test_points = np.concatenate((amd_test_points, points), axis=0)
            amd_test_labels = np.concatenate((amd_test_labels, labels), axis=0)
    else:
        if int(id) in control_train:
            control_train_points = np.concatenate((control_train_points, points), axis=0)
            control_train_labels = np.concatenate((control_train_labels, labels), axis=0)
        else:
            control_test_points = np.concatenate((control_test_points, points), axis=0)
            control_test_labels = np.concatenate((control_test_labels, labels), axis=0)

100%|██████████| 29/29 [00:09<00:00,  3.08it/s]


In [79]:
n_amd_train, n_amd_test = len(amd_train_labels), len(amd_test_labels)
n_control_train, n_control_test = len(control_train_labels), len(control_test_labels)

In [80]:
n_train = n_amd_train + n_control_train
n_test = n_amd_test + n_control_test

In [87]:
print(f'Total points:   {n_train + n_test}')
print(f'AMD points:     {n_amd_train + n_amd_test}')
print(f'Control points: {n_control_train + n_control_test}')
print(f'Total test:     {100 * n_test / (n_train + n_test):.1f}%')
print(f'AMD test:       {100 * n_amd_test / (n_amd_train + n_amd_test):.1f}%')
print(f'Control test:   {100 * n_control_test / (n_control_train + n_control_test):.1f}%')

Total points:   354650
AMD points:     278474
Control points: 76176
Total test:     24.9%
AMD test:       22.8%
Control test:   32.7%


In [88]:
amd_train_points.shape

(214912, 5)

# Save results

In [91]:
save_path = Path("/home/ja8818/data/points_dataset/")

In [93]:
np.save((save_path / "train" / "amd_points.npy"), amd_train_points)
np.save((save_path / "train" / "amd_labels.npy"), amd_train_labels)
np.save((save_path / "train" / "control_points.npy"), control_train_points)
np.save((save_path / "train" / "control_labels.npy"), control_train_labels)

In [94]:
np.save((save_path / "test" / "amd_points.npy"), amd_test_points)
np.save((save_path / "test" / "amd_labels.npy"), amd_test_labels)
np.save((save_path / "test" / "control_points.npy"), control_test_points)
np.save((save_path / "test" / "control_labels.npy"), control_test_labels)