# Guide for data processing

In [1]:
import numpy as np
from tools import read_data
import scipy.io as sio

## Load data

In [2]:
# # Vowels
# acous_train = '../../data/vowels/train/acoustics_train.mat'
# artic_train = '../../data/vowels/train/articulation_train.mat'

# Utterance
acous_train = '../../data/utterance1/train/acoustics_train.mat'
artic_train = '../../data/utterance1/train/articulation_train.mat'

acous_raw, artic_raw, labels = read_data.load_data(acous_train, artic_train)
# acous_raw and artic_raw is list variable
# each element in the list is a numpy array
acous_dim = acous_raw[0].shape[0]
artic_dim = artic_raw[0].shape[0]

print('Acoustics data: {:d}'.format(len(acous_raw)))
print('Acoustics dimension: {:d}\n'.format(acous_dim))
print('Articulation data: {:d}'.format(len(artic_raw)))
print('Articulation dimension: {:d}'.format(artic_dim))

Acoustics data: 655
Acoustics dimension: 39

Articulation data: 655
Articulation dimension: 14


## Check data

In [5]:
idx = np.random.permutation(len(acous_raw))[0]
acous_tmp = sio.loadmat(acous_train)['acoustics'][0,0]
artic_tmp = sio.loadmat(artic_train)['articulation'][0,0]
key = list(acous_tmp.dtype.fields.keys())[idx]
print(key)
read_data.check_original_plot(acous_tmp[idx], artic_tmp[idx])
# It takes about 2~5 minutes to load

JW13_TP082


## Zero padding

In [4]:
# (batch size x max length x features)
acous_padded = read_data.zero_padding(acous_raw)
artic_padded = read_data.zero_padding(artic_raw)
print('padded acoustics:', acous_padded.shape)
print('padded articulation:', artic_padded.shape)

padded acoustics: (655, 2756, 39)
padded articulation: (655, 2756, 14)


## Note
- RNN/LSTM훈련시 variable length에 대한 고려 필요
- 인풋 및 아웃풋 데이터의 형태를 (batch_size) x (time_step) x (dimension)로 넣어줄 경우 (=truncated BPTT) 데이터를 time_step만큼씩 잘라주는 과정 필요 (e.g. read_data.divide_timestep 사용)