### REI505M Final project: Music genre classification starter pack

The following Dataset class operates on the GTZAN dataset.

* The duration of most GTZAN files are 30 seconds (3022050=661500 samples) but some are slightly shorter (approx 29.9 seconds). For this reason we truncate at 660000 samples below.
* It may be beneficial to work with smaller chunks than ~30 seconds.
* You may want to perform the data augmentations in the `__get_item__` function.
* For now, `train_dataset` contains all the dataset, you need to set aside some examples for validation and test sets.

In [None]:
import torch
from torch.utils.data import DataLoader
import torch.nn as nn

from src.Conv1D import Conv1D
from src.Config import Config
from src.AudioDataset import AudioDataset
from src.DataPreparation import get_partitioned_data
import src.Utils as Utils 

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Training on:", device)

config = Config(#Path to folder with GTZAN files:
                audio_dir_path='../music/',
                # music/
                #  - rock/
                #       rock.00099.wav
                #       ...
                #  - reggie/
                #  ...
                #  - blues/
                #Choose how many genres we want to use:
                num_genres=10, # eg. 2, 3, 5, 10
                #Data Partition
                train_part_size=0.7,
                val_part_size=0.15,
                test_part_size=0.15,
                batch_size=4, 
                learning_rate=1e-3,
                epochs=10, 
                seed=42,
                device=device)

torch.manual_seed(config.seed) # Reproducible results

In [None]:
#Load num_genres from data and partition them
train_files, train_labels, val_files, val_labels, test_files, test_labels = get_partitioned_data(config)

#Create Datasets and Dataloaders
train_dataset = AudioDataset(audio_files=train_files, labels=train_labels,
                             audio_path=config.audio_dir_path, 
                             maxlen=660000, sampling_rate=22050, duration=25)
train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)

val_dataset = AudioDataset(val_files, val_labels, config.audio_dir_path,
                           maxlen=660000, sampling_rate=22050, duration=25)
val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False)

test_dataset = AudioDataset(test_files, test_labels, config.audio_dir_path,
                            maxlen=660000, sampling_rate=22050, duration=25)
test_loader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False)                            


tmp_features, tmp_labels = next(iter(train_loader))
print(f"Feature batch shape: {tmp_features.size()}")
print(f"Labels batch shape: {tmp_labels.size()}")

In [None]:
#Train model
n_classes = config.num_genres 
model = Conv1D(in_c=1, out_c=64, k=7, use_pool=True, n_classes=n_classes).to(config.device)

opt = torch.optim.Adam(model.parameters(), config.learning_rate)
crit = nn.CrossEntropyLoss()

Utils.train(train_dataset, train_loader, model, opt, lossfunc=crit, config=config, show_batch_time=True)

In [None]:
#Test model
Utils.test(test_dataset, test_loader, model, lossfunc=crit, config=config)