In [1]:
import numpy as np

## Load the datasets

We load the data from the UCR TS archive, you can get the data at http://www.cs.ucr.edu/~eamonn/time_series_data/

In [4]:
#Put here the path to the UCR TS Archive on your file system 
#datapath = '/media/christiaan/extra/timeseries/UCR_TS_Archive_2015'
datapath = '/media/sf_VBox_Shared/timeseries/UCR_TS_Archive_2015/'

In [5]:
datasets_train = {}
datasets_test = {}
for i in ["X", "Y", "Z"]:
    path_to_data_train = datapath + '/uWaveGestureLibrary_'+i+'/uWaveGestureLibrary_'+i+'_TRAIN'
    path_to_data_test = datapath + '/uWaveGestureLibrary_'+i+'/uWaveGestureLibrary_'+i+'_TEST'
    datasets_train[i] = np.genfromtxt(path_to_data_train, delimiter=',')
    datasets_test[i] = np.genfromtxt(path_to_data_test, delimiter=',')
    print(datasets_train[i].shape, datasets_test[i].shape)

(896, 316) (3582, 316)
(896, 316) (3582, 316)
(896, 316) (3582, 316)


In [6]:
y_train = np.vstack((datasets_train['X'][:,0], datasets_train['Y'][:,0], datasets_train['Z'][:,0])).transpose()

#Check labels are the same across channels
print(y_train.std(axis=1).sum()) # This should be 0
y_train = np.array(y_train[:,0], dtype='int')
y_test = np.array(datasets_test['X'][:,0], dtype='int')
print(y_test.shape)

0.0
(3582,)


In [7]:
X_train  = np.stack((datasets_train['X'][:,1:], datasets_train['Y'][:,1:], datasets_train['Z'][:,1:]), axis=-1)
X_test  = np.stack((datasets_test['X'][:,1:], datasets_test['Y'][:,1:], datasets_test['Z'][:,1:]), axis=-1)
print(X_train.shape)
print(X_test.shape)

(896, 315, 3)
(3582, 315, 3)


In [8]:
#Change class labels ranging from 0 to n-1
classlabels = list(set(y_train))
mapclasses = {classlabels[i] : i for i in range(len(classlabels))}
y_train = np.array([mapclasses[c] for c in y_train], dtype='int')
y_test = np.array([mapclasses[c] for c in y_test], dtype='int')

## Split the dataset into train and validation

In [9]:
ntrain = X_train.shape[0]
num_training = int(ntrain * 0.7)
num_validation = ntrain - num_training
num_test = X_test.shape[0]

#First sort the data in random order
np.random.seed(123)
neworder = np.random.permutation(ntrain)
X_train_random = X_train[neworder,:]
y_train_random = y_train[neworder]

# Our validation set will be num_validation points from the original
# training set.
mask = range(num_training, num_training + num_validation)
X_val = X_train_random[mask]
y_val = y_train_random[mask]
mask = range(num_training)
X_train = X_train_random[mask]
y_train = y_train_random[mask]

print(X_val.shape)

(269, 315, 3)


## Convert output data to binary format

In [10]:
# We need to convert the output
from keras.utils.np_utils import to_categorical
y_train_binary = to_categorical(y_train)
y_val_binary = to_categorical(y_val)
y_test_binary = to_categorical(y_test)

Using Theano backend.


## Save the data in binary format

In [12]:
# Change this to your path
outdatapath = '/media/sf_VBox_Shared/timeseries/UCR_WaveGesture/'

In [13]:
np.save(outdatapath+'X_train', X_train)
np.save(outdatapath+'y_train_binary', y_train)
np.save(outdatapath+'X_val', X_val)
np.save(outdatapath+'y_val_binary', y_val)
np.save(outdatapath+'X_test', X_test)
np.save(outdatapath+'y_test_binary', y_test)