In [2]:
import os
import csv

import numpy as np
import matplotlib.pyplot as plt

import torch

In [3]:
def get_subdirs(path):    
    sub_dirs = os.listdir(path)
    return sub_dirs

def parse_dataset(path, subdirs):
    dataset = []
    
    for subdir in subdirs:
        files = os.listdir(path + '/' + subdir)
        
        for file in files:
            fp = "{}/{}/{}".format(path, subdir, file)
            
            data = list(csv.reader(open(fp, 'r'), delimiter='\t'))
            
            dataset.append(data)
    return dataset

def to_float(data):
    return [np.array(d).astype('float') for d in data]

def crop_datasets(dataset, min_len):
    return np.array([d[:min_len] for d in dataset])

In [4]:
data_path = '../data/aussign'

subdirs = get_subdirs(data_path)

train_dataset = to_float(parse_dataset(data_path, subdirs[:7]))
val_dataset = to_float(parse_dataset(data_path, subdirs[7:8]))
test_dataset = to_float(parse_dataset(data_path, subdirs[8:9]))

min_len_train = min([len(d) for d in train_dataset])
min_len_val = min([len(d) for d in val_dataset])
min_len_test = min([len(d) for d in test_dataset])
min_len = min(min_len_train, min_len_val, min_len_test)

train_dataset = crop_datasets(train_dataset, min_len_train)
val_dataset = crop_datasets(val_dataset, min_len_train)
test_dataset = crop_datasets(test_dataset, min_len_train)

In [6]:
out_path = '../data/aussign_parsed'

torch.save({
    'train_dataset': train_dataset,
    'val_dataset': val_dataset,
    'test_dataset': test_dataset
}, out_path)