### REI505M Final project: Music genre classification starter pack

The following Dataset class operates on the GTZAN dataset.

* The duration of most GTZAN files are 30 seconds (3022050=661500 samples) but some are slightly shorter (approx 29.9 seconds). For this reason we truncate at 660000 samples below.
* It may be beneficial to work with smaller chunks than ~30 seconds.
* You may want to perform the data augmentations in the `__get_item__` function.
* For now, `train_dataset` contains all the dataset, you need to set aside some examples for validation and test sets.

In [1]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import scipy.io.wavfile as wav
import os
from sklearn.model_selection import train_test_split

from src.Config import Config

In [2]:
config = Config(audio_dir_path='../music/', # Path to folder with GTZAN files
                # music/
                #  - rock/
                #       rock.00099.wav
                #       ...
                #  - reggie/
                #  ...
                #  - blues/
                batch_size=32, 
                epochs=10, 
                seed=42)

torch.manual_seed(config.seed) # Reproducible results

<torch._C.Generator at 0x21aebcb78d0>

In [4]:
class AudioDataset(Dataset):
    def __init__(self, audio_files, labels, audio_path,
                 maxlen, sampling_rate, duration):
        self.audio_files = audio_files
        self.audio_path = audio_path
        self.labels = labels
        self.maxlen = maxlen
        self.sampling_rate = sampling_rate
        self.duration = duration

    def __len__(self):
        return len(self.audio_files)

    def __getitem__(self, idx):
        label = self.labels[idx]
        audio_file = self.audio_files[idx]
        audio_dir = audio_file[:audio_file.index('.')]
        file_path = os.path.join(self.audio_path, audio_dir, audio_file)
        (rate,audio_samples) = wav.read(file_path)
        audio_samples = torch.from_numpy(audio_samples).to(torch.float32)
        if len(audio_samples) > self.maxlen:
            # Truncate
            audio_samples = audio_samples[:self.maxlen]

        tstart = 0 # Offset from start of song (hyper-parameter!)
        audio_samples = audio_samples[int(self.sampling_rate*tstart):int(self.sampling_rate*(tstart+self.duration))]

        return audio_samples, label

label_map={'blues' : 0, 'classical' : 1, 'country' : 2,
           'disco' : 3, 'hiphop'    : 4, 'jazz'    : 5,
           'metal' : 6, 'pop'       : 7, 'reggae'  : 8, 'rock' : 9}



# Choose how many genres we want to use
num_genres = 2  # <-- change this (2, 3, 5, 10, etc.)

# Automatically select the first `num_genres` genres from label_map
selected_genres = list(label_map.keys())[:num_genres]

# Build a reduced label map only for selected genres
label_map = {genre: i for i, genre in enumerate(selected_genres)}

print(f"Using {num_genres} genres: {selected_genres}")
print("Updated label map:", label_map)


audio_files = []
labels = []

""" 
for root, subdirs, files in os.walk(audio_dir):
    for fname in files:
        if fname == '.DS_Store':
            continue
        audio_files.append(fname)
        labels.append(label_map[fname[:fname.index('.')]]) """

# Then make sure your folder traversal loop only collects those folders:
for genre in os.listdir(config.audio_dir_path):
    if genre not in label_map:
        continue
    genre_path = os.path.join(config.audio_dir_path, genre)
    for fname in os.listdir(genre_path):
        if fname.endswith('.wav'):
            file_path = os.path.join(genre_path, fname)
            audio_files.append(file_path)
            labels.append(label_map[genre])

print(f"Total selected files: {len(audio_files)}")        

# ration training - validation - test data
# 70% Training, 15% Validation, 15% Test


# 70% train, 30% temp
train_files, temp_files, train_labels, temp_labels = train_test_split(
    audio_files, labels, test_size=0.30, stratify=labels, random_state=42
)

# Split temp 30% -> 15% val + 15% test
val_files, test_files, val_labels, test_labels = train_test_split(
    temp_files, temp_labels, test_size=0.5, stratify=temp_labels, random_state=42
)

print(f"Training set: {len(train_files)}")
print(f"Validation set: {len(val_files)}")
print(f"Test set: {len(test_files)}")

assert len(set(train_files) & set(val_files) & set(test_files)) == 0



# TODO: Create validation and test sets
#train_files = [audio_files[i] for i in range(len(audio_files))]
#train_labels = [labels[i] for i in range(len(audio_files))]
#print("Training set:", len(train_files))


#print("Validation set:", len(validation_files))


#print("Test set:", len(test_files))


#assert len(set(train_files) & set(validation_files) & set(test_files)) == 0

train_dataset = AudioDataset(audio_files=train_files, labels=train_labels,
                             audio_path=config.audio_dir_path, 
                             maxlen=660000, sampling_rate=22050, duration=25)

train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
# TODO: Create dataloaders for validation and test sets
val_dataset = AudioDataset(val_files, val_labels, config.audio_dir_path,
                           maxlen=660000, sampling_rate=22050, duration=25)

test_dataset = AudioDataset(test_files, test_labels, config.audio_dir_path,
                            maxlen=660000, sampling_rate=22050, duration=25)

val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False)                            


tmp_features, tmp_labels = next(iter(train_loader))
print(f"Feature batch shape: {tmp_features.size()}")
print(f"Labels batch shape: {tmp_labels.size()}")
#Test modification for git workflow TESTGITHUB

Using 2 genres: ['blues', 'classical']
Updated label map: {'blues': 0, 'classical': 1}
Total selected files: 200
Training set: 140
Validation set: 30
Test set: 30
Feature batch shape: torch.Size([32, 551250])
Labels batch shape: torch.Size([32])
