In [1]:
!conda install -c pytorch torchaudio
!pip install wandb

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



In [2]:
# imports
import random

import os
import os.path as path

import numpy as np

from tqdm.notebook import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.distributions as distributions

from torch.utils.data import DataLoader, Dataset, random_split

import torchaudio
import torchaudio.functional as audioF
import torchaudio.transforms as audioT

from torchaudio.sox_effects import apply_effects_file

In [3]:
# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
import wandb

wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mmaverickuor[0m (use `wandb login --relogin` to force relogin)


True

In [6]:
config = dict(
    epochs=20,
    classes=4,
    kernels=[2205, 256],
    batch_size=64,
    learning_rate=0.005,
    dataset="MER_Taffc",
    architecture="1D Convolution")

In [9]:
# data paths
DATA_PATH = "../../datasets/MER_taffc/wav/"

In [10]:
class MERTaffcDataSet(Dataset):
    """MER_Taffc dataset."""
    
    folders = ["Q1", "Q2", "Q3", "Q4"]
    labels = {
        "Q1": torch.tensor(0),
        "Q2": torch.tensor(1),
        "Q3": torch.tensor(2),
        "Q4": torch.tensor(3)
    }
    
    def __init__(self, root_dir, sample_rate=22050, duration=30):
        self.root_dir = root_dir
        self.sample_rate = sample_rate
        self.duration = duration
        self.frame_count = self.sample_rate * self.duration
        
        all_files = []
        for f in self.folders:
            files = os.listdir(path.join(root_dir, f))
            files = list(map(lambda x: (path.join(root_dir, f, x), f), files))
            all_files.extend(files)
            
        self.files = all_files
        self.count = len(all_files)

    def __len__(self):
        return self.count

    def __getitem__(self, idx):
        audio_file, label = self.files[idx]
        
        x, sr = torchaudio.load(audio_file)
        out = torch.zeros(1, self.frame_count)
        
        effects = [
          ["rate", f"{self.sample_rate}"]
        ]
        
        x, sr2 = torchaudio.sox_effects.apply_effects_tensor(x, sr, effects)
        
        if self.frame_count >= x.shape[1]:
            out[:, :x.shape[1]] = x
        else:
            out[:, :] = x[:, :self.frame_count]
        
        
        return (out, self.labels[label])

In [11]:
class BaseAudioNet(nn.Module):
    
    def __init__(self):
        super(BaseAudioNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=48, kernel_size=80, stride=4),
            nn.BatchNorm1d(num_features=48),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=4, stride=4),

            nn.Conv1d(in_channels=48, out_channels=48, kernel_size=3, stride=1),
            nn.BatchNorm1d(num_features=48),
            nn.ReLU(),
            nn.Conv1d(in_channels=48, out_channels=48, kernel_size=3, stride=1),
            nn.BatchNorm1d(num_features=48),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=4, stride=4),

            nn.Conv1d(in_channels=48, out_channels=96, kernel_size=3, stride=1),
            nn.BatchNorm1d(num_features=96),
            nn.ReLU(),
            nn.Conv1d(in_channels=96, out_channels=96, kernel_size=3, stride=1),
            nn.BatchNorm1d(num_features=96),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=4, stride=4),

            nn.Conv1d(in_channels=96, out_channels=192, kernel_size=3, stride=1),
            nn.BatchNorm1d(num_features=192),
            nn.ReLU(),
            nn.Conv1d(in_channels=192, out_channels=192, kernel_size=3, stride=1),
            nn.BatchNorm1d(num_features=192),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=4, stride=4),

            nn.Conv1d(in_channels=192, out_channels=384, kernel_size=3, stride=1),
            nn.BatchNorm1d(num_features=384),
            nn.ReLU(),
            nn.Conv1d(in_channels=384, out_channels=384, kernel_size=3, stride=1),
            nn.BatchNorm1d(num_features=384),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(50)
        )

        self.predictor = nn.Sequential(
            nn.Linear(in_features=384*50, out_features=512),
            nn.ReLU(),
            nn.Linear(in_features=512, out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=4)
        )
        
    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, start_dim=1)
        x = self.predictor(x)
        return x

In [12]:
def get_data():
    dataset = MERTaffcDataSet(DATA_PATH)
    train_len = int(len(dataset) * 0.6)
    valid_len = int(len(dataset) * 0.2)
    test_len = len(dataset) - (train_len + valid_len)
    return random_split(dataset, [train_len, valid_len, test_len], generator=torch.Generator().manual_seed(42))


def make_loader(dataset, batch_size):
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    return loader

In [13]:
# def train(model, loader, criterion, optimizer, epochs=20):
#     total_batches = len(loader) * epochs
#     example_ct = 0  # number of examples seen
#     batch_ct = 0
#     for epoch in tqdm(range(epochs)):
#         for (X, labels) in loader:
#             loss = train_batch(X, labels, model, criterion, optimizer)
#             example_ct +=  len(X)
#             batch_ct += 1
#             # Report metrics every 25th batch
#             if ((batch_ct + 1) % 25) == 0:
#                 train_log(loss, example_ct, epoch)

# def train_batch(X, labels, model, criterion, optimizer):
#     X = X.to(device)
#     labels = labels.to(device)
    
#     # Forward pass ➡
#     outputs = F.softmax(model(X), dim=1)
    
#     loss = criterion(outputs, labels)

#     # Backward pass ⬅
#     optimizer.zero_grad()
#     loss.backward()

#     # Step with optimizer
#     optimizer.step()

#     return loss

In [14]:
# def run():
    
#     (train_dataset, val_dataset, test_dataset) = get_data()
#     train_loader = make_loader(train_dataset, 64)
    
#     model = BaseAudioNet()
    
#     criterion = nn.CrossEntropyLoss()
#     optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
#     train(model, train_loader, criterion, optimizer, epochs=1)

# run()

In [15]:
# (X, label) = next(iter(dataloader))
# X = X.to(device)
# logits = net(X)
# pred_probab = nn.Softmax(dim=1)(logits)
# y_pred = pred_probab.argmax(1)
# print(f"Predicted class: {y_pred}")

In [16]:
# optimizer = torch.optim.Adam(params=net.parameters())
# loss_fn = torch.

# for batch in dataloader:
#     x, targets = batch
#     preds = net(x)
#     print(preds.shape)
#     break

In [17]:
# (train, x, y) = get_data()
# t = make_loader(train, 64)
# a = next(iter(t))
# print(a[0])

In [18]:
def make_model(config):
    net = BaseAudioNet()
    return net

In [19]:
def make(config):
    # Make the data
    train, valid, test = get_data()
    train_loader = make_loader(train, batch_size=config.batch_size)
    valid_loader = make_loader(valid, batch_size=config.batch_size)
    test_loader = make_loader(test, batch_size=config.batch_size)

    # Make the model
    model = make_model(config).to(device)

    # Make the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(
        model.parameters(), lr=config.learning_rate)
    
    return model, train_loader, valid_loader, test_loader, criterion, optimizer

In [20]:
def train_log(loss, example_ct, epoch):
    loss = float(loss)

    # where the magic happens
    wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Loss after " + str(example_ct).zfill(5) + f" examples: {loss:.3f}")

In [21]:
def test(model, test_loader):
    model.eval()

    # Run the model on some test examples
    with torch.no_grad():
        correct, total = 0, 0
        for X, labels in test_loader:
            X, labels = X.to(device), labels.to(device)
            outputs = model(X)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        print(f"Accuracy of the model on the {total} " + f"test images: {100 * correct / total}%")
        wandb.log({"test_accuracy": correct / total})

    # Save the model in the exchangeable ONNX format
    torch.onnx.export(model, images, "model.onnx")
    wandb.save("model.onnx")

In [22]:
def train(model, loader, _val_loader, criterion, optimizer, config):
    # tell wandb to watch what the model gets up to: gradients, weights, and more!
    wandb.watch(model, criterion, log="all", log_freq=10)
    
    print(config)

    # Run training and track with wandb
    total_batches = len(loader) * config.epochs
    example_ct = 0  # number of examples seen
    batch_ct = 0
    for epoch in tqdm(range(config.epochs)):
        for (X, labels) in loader:
            loss = train_batch(X, labels, model, optimizer, criterion)
            example_ct +=  len(X)
            batch_ct += 1

            # Report metrics every 25th batch
            if ((batch_ct + 1) % 25) == 0:
                train_log(loss, example_ct, epoch)


def train_batch(X, labels, model, optimizer, criterion):
    X = X.to(device)
    labels = labels.to(device)

    # Forward pass ➡
    outputs = model(X)
    loss = criterion(outputs, labels)

    # Backward pass ⬅
    optimizer.zero_grad()
    loss.backward()

    # Step with optimizer
    optimizer.step()

    return loss

In [23]:
def model_pipeline(hyperparameters):

    # tell wandb to get started
    with wandb.init(project='baseline-audio-only', entity='maverickuor', config=hyperparameters):
        # access all HPs through wandb.config, so logging matches execution!
        config = wandb.config

        # make the model, data, and optimization problem
        model, train_loader, valid_loader, test_loader, criterion, optimizer = make(config)
        print(model)

        # and use them to train the model
        train(model, train_loader, valid_loader, criterion, optimizer, config)

        # and test its final performance
        test(model, test_loader)

    return model

In [24]:
# Build, train and analyze the model with the pipeline
model = model_pipeline(config)

BaseAudioNet(
  (features): Sequential(
    (0): Conv1d(1, 48, kernel_size=(80,), stride=(4,))
    (1): BatchNorm1d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (4): Conv1d(48, 48, kernel_size=(3,), stride=(1,))
    (5): BatchNorm1d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Conv1d(48, 48, kernel_size=(3,), stride=(1,))
    (8): BatchNorm1d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU()
    (10): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
    (11): Conv1d(48, 96, kernel_size=(3,), stride=(1,))
    (12): BatchNorm1d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU()
    (14): Conv1d(96, 96, kernel_size=(3,), stride=(1,))
    (15): BatchNorm1d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

  0%|          | 0/20 [00:00<?, ?it/s]



VBox(children=(Label(value=' 0.01MB of 0.01MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

KeyboardInterrupt: 