In [1]:
!git clone https://github.com/DmitryPodpryatov/PMLDL-project.git

Cloning into 'PMLDL-project'...
remote: Enumerating objects: 2043, done.[K
remote: Counting objects: 100% (30/30), done.[K
remote: Compressing objects: 100% (23/23), done.[K
remote: Total 2043 (delta 2), reused 28 (delta 2), pack-reused 2013[K
Receiving objects: 100% (2043/2043), 1.19 GiB | 29.96 MiB/s, done.
Resolving deltas: 100% (4/4), done.
Checking out files: 100% (2014/2014), done.


In [2]:
%cd PMLDL-project/

/content/PMLDL-project


In [3]:
from IPython.display import Audio

from tensorflow.keras.preprocessing.sequence import pad_sequences

from sklearn.model_selection import train_test_split

import numpy as np
import os
from tqdm import tqdm

from music.wav import read_wav, write_wav

'2.7.0'

In [4]:
dataset_path = 'dataset/genres_original/'

genres = [folder for folder in os.listdir(dataset_path)]

tracks = {
    genre: list(map(
        lambda filename: os.path.join(dataset_path, genre, filename),
        os.listdir(os.path.join(dataset_path, genre))
    ))
    for genre in genres
}

genres

['country',
 'classical',
 'reggae',
 'disco',
 'blues',
 'pop',
 'hiphop',
 'jazz',
 'rock',
 'metal']

In [5]:
music_file = tracks[genres[-1]][0]

track = read_wav(music_file)

Audio(music_file)
music_file

'dataset/genres_original/metal/metal.00067.wav'

In [6]:
track

{ filename=dataset/genres_original/metal/metal.00067.wav,
 sample_rate=22050,
 length=30.000s }

In [7]:
dataset = dict()
dataset['disco'] = tracks['disco']
dataset['rock'] = tracks['rock']

dataset = {
    genre: [read_wav(filename) for filename in files]
    for genre, files in dataset.items()
}

dataset['rock'][0]

{ filename=dataset/genres_original/rock/rock.00058.wav,
 sample_rate=22050,
 length=30.013s }

In [8]:
# All tracks have same sample rate
SAMPLE_RATE = track.sample_rate

np.unique(
    [t.sample_rate
     for _, v in dataset.items()
     for t in v]
)

array([22050])

In [9]:
min([t.length for t in dataset['rock']])

29.995827664399094

In [10]:
def flatten(list_of_lists):
    return [elem for lst in tqdm(list_of_lists) for elem in lst]


train_ratio = 0.8
test_ratio = 1 - train_ratio

track_length = SAMPLE_RATE * 30

X_rock = [track.data for track in dataset['rock'][:10]]
X_disco = [track.data for track in dataset['disco'][:10]]

In [11]:
window = 1

X_rock_flat = flatten(X_rock)
X_disco_flat = flatten(X_disco)

y_rock_flat, X_rock_flat = X_rock_flat[window:], X_rock_flat[:-window]
y_disco_flat, X_disco_flat = X_disco_flat[window:], X_disco_flat[:-window]

X = np.concatenate([X_rock_flat, X_disco_flat])
y = np.concatenate([y_rock_flat, y_disco_flat])

100%|██████████| 10/10 [00:00<00:00, 16.36it/s]
100%|██████████| 10/10 [00:00<00:00, 15.94it/s]


In [12]:
def list2samples(iterable, window):
    sampled_iterable = []

    for i in range(0, len(iterable), window):
        sampled_iterable.append(iterable[i: i + window])

    return sampled_iterable


X_sampled = list2samples(X, window)
y_sampled = list2samples(y, window)

In [13]:
X_rock = pad_sequences(X_sampled)
X_disco = pad_sequences(y_sampled)

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X_sampled, y_sampled, train_size=0.9)

X_train, X_test, y_train, y_test = np.asarray(X_train), np.asarray(X_test), np.asarray(y_train), np.asarray(y_test)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((11915663, 1), (11915663, 1), (1323963, 1), (1323963, 1))

In [15]:
np.save('X_train.npy', X_train)
np.save('X_test.npy', X_test)

np.save('y_train.npy', y_train)
np.save('y_test.npy', y_test)