In [1]:
import pandas as pd
import numpy as np

import re
import os
import glob

from sklearn.model_selection import train_test_split

import torch
from torchvision import transforms

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_data_files = glob.glob(os.path.join("../data/train_thetas/", "*.csv"))
train_data_files.sort(key=lambda f: int(re.sub('\D', '', f)))
train_data = [torch.from_numpy(pd.read_csv(f, header=None, sep=',').to_numpy().flatten(order="F").reshape(2, 40, 40)).float()
               for f in train_data_files]# shape 15000 * (2, 40, 40)

target_train_files = glob.glob(os.path.join("../data/training_ground_truth_files", "*_ground"))
target_train_files.sort(key=lambda f: int(re.sub('\D', '', f)))
target_train = [torch.from_numpy(pd.read_csv(f, header=None, sep=' ').to_numpy()).long()
                 for f in target_train_files] # shape 15000 * (40, 40)

test_data_files = glob.glob(os.path.join("../data/test_thetas/", "*.csv"))
test_data_files.sort(key=lambda f: int(re.sub('\D', '', f)))
test_data = [torch.from_numpy(pd.read_csv(f, header=None, sep=',').to_numpy().flatten(order="F").reshape(2, 40, 40)).float()
               for f in test_data_files]# shape 15000 * (2, 40, 40)

In [3]:
print("train_data shape: ", len(train_data))
print("target_train shape: ", len(target_train))

train_data shape:  15000
target_train shape:  15000


In [4]:
# split train_data into train and validation
X_train, X_val, y_train, y_val = train_test_split(train_data, target_train, test_size=0.2, random_state=42)

In [5]:
def augment_rotate(X, y):
    X_aug = []
    y_aug = []
    for i in range(len(X)):
        for rotation in range(4):
            X_aug.append(torch.rot90(X[i], rotation, [1, 2]))
            y_aug.append(torch.rot90(y[i], rotation, [0, 1]))
    return X_aug, y_aug


def augment_flip(X, y):
    X_aug = []
    y_aug = []
    for i in range(len(X)):
        X_aug.append(X[i])
        y_aug.append(y[i])
        X_aug.append(torch.flip(X[i], [1]))
        y_aug.append(torch.flip(y[i], [0]))
    return X_aug, y_aug


def augment_roll(X, y):
    X_aug = []
    y_aug = []
    for i in range(len(X)):
        for roll_x in range(-1, 2):
            for roll_y in range(-1, 2):
                X_aug.append(torch.roll(X[i], shifts=(roll_x, roll_y), dims=(1, 2)))
                y_aug.append(torch.roll(y[i], shifts=(roll_x, roll_y), dims=(0, 1)))
    return X_aug, y_aug


def augment_data(X, y):
    X_roll, y_roll = augment_roll(X, y)
    X_roll_flip, y_roll_flip = augment_flip(X_roll, y_roll)
    X_roll_flip_rotate, y_roll_flip_rotate = augment_rotate(X_roll_flip, y_roll_flip)
    return X_roll_flip_rotate, y_roll_flip_rotate

In [6]:
data_transforms = transforms.Compose(
    [
        transforms.Normalize(
        mean=[0.0, 0.0],
        std=[1.0, 1.0],
        ),
    ]
)

In [7]:
train_folder = "../data/train/"
if not os.path.exists(train_folder):
    os.makedirs(train_folder)

val_folder = "../data/val/"
if not os.path.exists(val_folder):
    os.makedirs(val_folder)


test_folder = "../data/test/"
if not os.path.exists(test_folder):
    os.makedirs(test_folder)

In [8]:
for i in range(len(X_train)):
    X_train[i] = data_transforms(X_train[i])
    X_train_aug, y_train_aug = augment_data([X_train[i]], [y_train[i]])
    aug_num = len(X_train_aug)
    for j in range(aug_num):
        torch.save(X_train_aug[j], train_folder + "X" + str(i*aug_num+j) + ".pt")
        torch.save(y_train_aug[j], train_folder + "y" + str(i*aug_num+j)  + ".pt")

In [None]:
for i in range(len(X_val)):
    X_val[i] = data_transforms(X_val[i])
    torch.save(X_val[i], val_folder + "X" + str(i) + ".pt")
    torch.save(y_val[i], val_folder + "y" + str(i)  + ".pt")

In [None]:
for i in range(len(test_data)):
    test_data[i] = data_transforms(test_data[i])
    torch.save(test_data[i], test_folder + "X" + str(i) + ".pt")