In [8]:
import numpy as np
import os

In [3]:
def split_feature(X, split_window_len, split_window_shift):
    X_split = []
    for feature in X:
        feature_split = []
        for i in range(0, feature.shape[0] - split_window_len, split_window_shift):
            tmp_window_feature = feature[i:i+split_window_len, :]
            feature_split.append(tmp_window_feature)
        X_split.append(feature_split)
    return np.array(X_split, dtype=np.float32)

In [12]:
def load_split_feature(dataset_feature_rootpath):
    GENRES = sorted(os.listdir(dataset_feature_rootpath))
    X_split = []
    count = 0
    for genre in GENRES:
        genre_path = os.path.join(dataset_feature_rootpath, genre)
        for file in os.listdir(genre_path):
            split_feature = np.load(os.path.join(genre_path, file))
            split_feature = np.reshape(split_feature, newshape=(split_feature.shape[0], 
                                                                split_feature.shape[2], 
                                                                split_feature.shape[3]))
            split_feature = np.transpose(split_feature, axes=(0, 2, 1))
            X_split.append(split_feature)
    return np.array(X_split, dtype=np.float32)

In [4]:
#full time step is 647, one music time duration is 29.2s
#split window is 128, time duration is about 3s
X_stft_1024 = np.load('GTZAN/without_split_features/stft_feature_1024.npy')
X_melspec_1024 = np.load('GTZAN/without_split_features/melspec_feature_1024.npy')
X_mfcc_1024 = np.load('GTZAN/without_split_features/mfcc_feature_1024.npy')

X_stft_split_1024_3s = split_feature(X_stft_1024, 128, 64)
X_melspec_split_1024_3s = split_feature(X_melspec_1024, 128, 64)
X_mfcc_split_1024_3s = split_feature(X_mfcc_1024, 128, 64)

np.save('GTZAN/split_features/stft_featur_split_1024_3s.npy', X_stft_split_1024_3s)
np.save('GTZAN/split_features/melspec_featur_split_1024_3s.npy', X_melspec_split_1024_3s)
np.save('GTZAN/split_features/mfcc_featur_split_1024_3s.npy', X_mfcc_split_1024_3s)

In [4]:
X_stft_2048 = np.load('GTZAN/without_split_features/stft_feature_2048.npy')
X_melspec_2048 = np.load('GTZAN/without_split_features/melspec_feature_2048.npy')
X_mfcc_2048 = np.load('GTZAN/without_split_features/mfcc_feature_2048.npy')

X_stft_split_2048_3s = split_feature(X_stft_2048, 64, 32)
X_melspec_split_2048_3s = split_feature(X_melspec_2048, 64, 32)
X_mfcc_split_2048_3s = split_feature(X_mfcc_2048, 64, 32)

np.save('GTZAN/split_features/stft_featur_split_2048_3s.npy', X_stft_split_2048_3s)
np.save('GTZAN/split_features/melspec_featur_split_2048_3s.npy', X_melspec_split_2048_3s)
np.save('GTZAN/split_features/mfcc_featur_split_2048_3s.npy', X_mfcc_split_2048_3s)

In [5]:
print(X_stft_split_2048_3s.shape)

(1000, 19, 64, 1025)


In [13]:
X_split_ls = load_split_feature('/share/music-datasets/GTZAN/stft-seg-3s-1024/')

In [16]:
np.save('GTZAN/split_features/stft_featur_split_1024_3s_ls.npy', X_split_ls)