## That's a notebook with data preprocessing from the downloaded to needed format

You can download the preprocessed data [here](https://drive.google.com/drive/folders/1AE_mohNpxRg3JXoBX2oiN-8xaMcGYYuX?usp=sharing)

In [1]:
import os
import torch
import numpy as np
from pathlib import Path

In [2]:
UNIFORM_DIR = Path('./prepared_airplanes/uniform/')
SURFACE_DIR = Path('./prepared_airplanes/surface/')
RESULTS_DIR = Path('./processed_data/')

# Samples per shape
TRAIN_SAMPLES = 100_000
VALID_SAMPLES = 20_000
TEST_SAMPLES = 80_000

# Fraction of points sampled on surface
SURFACE_FRACTION = 0.9

# Fraction of files for the test set
TEST_FRACTION = 0.2

In [3]:
files = os.listdir(UNIFORM_DIR)
num_files = len(files)
split_threshold = int(num_files * TEST_FRACTION)
train_val_files = files[split_threshold:]
test_files = files[:split_threshold]

train_idx = torch.Tensor([])
train_X = torch.Tensor([])
train_y = torch.Tensor([])

valid_idx = torch.Tensor([])
valid_X = torch.Tensor([])
valid_y = torch.Tensor([])

test_idx = torch.Tensor([])
test_X = torch.Tensor([])
test_y = torch.Tensor([])

for i, file in enumerate(train_val_files):
    uniform = np.load(UNIFORM_DIR / file)
    surface = np.load(SURFACE_DIR / file)
    num_samples = TRAIN_SAMPLES + VALID_SAMPLES
    
    idx_uniform = np.random.choice(
        uniform.shape[0],
        size=int(num_samples - num_samples * SURFACE_FRACTION),
        replace=False
    )
    
    idx_surface = np.random.choice(
        surface.shape[0],
        size=int(num_samples * SURFACE_FRACTION),
        replace=False
    )
    
    combined_valid = np.concatenate(
        (uniform[idx_uniform[:int(VALID_SAMPLES - VALID_SAMPLES * SURFACE_FRACTION)], :], 
         surface[idx_surface[:int(VALID_SAMPLES * SURFACE_FRACTION)], :])
    )
    
    combined_train = np.concatenate(
        (uniform[idx_uniform[int(VALID_SAMPLES - VALID_SAMPLES * SURFACE_FRACTION):], :], 
         surface[idx_surface[int(VALID_SAMPLES * SURFACE_FRACTION):], :])
    )
    
    train_idx = torch.cat((train_idx, torch.Tensor(TRAIN_SAMPLES * [i]))).int()
    valid_idx = torch.cat((valid_idx, torch.Tensor(VALID_SAMPLES * [i]))).int()
        
    train_X = torch.cat((train_X, torch.from_numpy(combined_train[:, :3])))
    valid_X = torch.cat((valid_X, torch.from_numpy(combined_valid[:, :3])))
    
    train_y = torch.cat((train_y, torch.from_numpy(combined_train[:, 3])))
    valid_y = torch.cat((valid_y, torch.from_numpy(combined_valid[:, 3])))

for i, file in enumerate(test_files):
    uniform = np.load(UNIFORM_DIR / file)
    surface = np.load(SURFACE_DIR / file)
    
    idx_uniform = np.random.choice(
        uniform.shape[0],
        size=int(TEST_SAMPLES - TEST_SAMPLES * SURFACE_FRACTION),
        replace=False
    )
    
    idx_surface = np.random.choice(
        surface.shape[0],
        size=int(TEST_SAMPLES * SURFACE_FRACTION),
        replace=False
    )
    
    combined_test = np.concatenate(
        (uniform[idx_uniform, :], 
         surface[idx_surface, :])
    )

    test_idx = torch.cat((test_idx, torch.Tensor(TEST_SAMPLES * [i]))).int()
    test_X = torch.cat((test_X, torch.from_numpy(combined_test[:, :3])))
    test_y = torch.cat((test_y, torch.from_numpy(combined_test[:, 3])))

In [4]:
train_idx.size(), train_X.size(), train_y.size()

(torch.Size([12000000]), torch.Size([12000000, 3]), torch.Size([12000000]))

In [5]:
valid_idx.size(), valid_X.size(), valid_y.size()

(torch.Size([2400000]), torch.Size([2400000, 3]), torch.Size([2400000]))

In [6]:
test_idx.size(), test_X.size(), test_y.size()

(torch.Size([2400000]), torch.Size([2400000, 3]), torch.Size([2400000]))

In [7]:
torch.save(train_idx, RESULTS_DIR / 'train_idx.pt')
torch.save(train_X, RESULTS_DIR / 'train_X.pt')
torch.save(train_y, RESULTS_DIR / 'train_y.pt')

torch.save(valid_idx, RESULTS_DIR / 'valid_idx.pt')
torch.save(valid_X, RESULTS_DIR / 'valid_X.pt')
torch.save(valid_y, RESULTS_DIR / 'valid_y.pt')

torch.save(test_idx, RESULTS_DIR / 'test_idx.pt')
torch.save(test_X, RESULTS_DIR / 'test_X.pt')
torch.save(test_y, RESULTS_DIR / 'test_y.pt')