# Lets place this notebook in the root directory

In [1]:
import os
path = %pwd
if path.split(os.sep)[-1] == 'notebooks':
    %cd ..

/mnt/c/Users/Mustapha/Documents/competitions/klee_project_audio


Lets also refresh all our dependecies in run time

In [2]:
%reload_ext autoreload
%autoreload 2

load environment variables, if they exist

In [3]:
from dotenv import load_dotenv

load_dotenv(".env_consts")

True

# Imports

In [4]:
# -------------------------------- torch stuff ------------------------------- #
import torch

# ----------------------------------- other ---------------------------------- #
from tqdm import tqdm
import wandb

# ---------------------------------- Custom ---------------------------------- #
from src.load_dataset_fft import get_splitter_dataloaders_fft

# Get data

In [5]:
F16 = torch.float16
F32 = torch.float32
F64 = torch.float64
FTYPE = F32
TRAIN_SPLIT = float(os.getenv('KLEE_TRAIN_SPLIT', 0.8))
BATCH_SIZE = int(os.getenv('KLEE_BATCH_SIZE', 32))
kwargs = {
        "BATCH_SIZE": BATCH_SIZE,
        "TRAIN_SPLIT": TRAIN_SPLIT,
        "FTYPE": FTYPE,
        "fft_nperseg": 400,
        "fft_noverlap": 240,
        "fft_window_type": "tukey",
        "fft_in_db": False,
        }
print("kwargs : ",kwargs)
train_loader, val_loader, data = get_splitter_dataloaders_fft(**kwargs)

kwargs :  {'BATCH_SIZE': 8, 'TRAIN_SPLIT': 0.8, 'FTYPE': torch.float32, 'fft_nperseg': 400, 'fft_noverlap': 240, 'fft_window_type': 'tukey', 'fft_in_db': False}


  fft /= np.linalg.norm(fft, axis=0, keepdims=True)
Caching dataset: 100%|██████████| 5720/5720 [03:34<00:00, 26.66it/s]


In [6]:
example, lab = train_loader.dataset[0]
example.shape  # (channel, frequency, time)

torch.Size([1, 201, 498])

# WandB

In [7]:
wandb.init(project="klee_project_audio", entity="mustapha")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmustapha[0m (use `wandb login --relogin` to force relogin)


# Original model CRNN

### Create model

In [8]:
class PermuteForLSTM(torch.nn.Module):
    """
    Permute the input from (batch, channel, freq, time) to (batch, time, freq, channel)
    """
    def __init__(self):
        super().__init__()
    def forward(self, x):
        return x.permute(0, 3, 2, 1)
    

class ReshapForLSTM(torch.nn.Module):
    """
    Reshape the input from (batch, time, freq, channel) to (batch, time, freq*channel)
    """
    def __init__(self):
        super().__init__()
    def forward(self, x):
        return x.reshape(x.shape[0], x.shape[1], -1)

class SequentialLSTM(torch.nn.Module):
    """
    LSTMs in Pytorch, outputs a tuple (output, (h_n, c_n)), we only need output
    """
    def __init__(self, input_size=1280, hidden_size=40, num_layers=1, dropout=0):
        super().__init__()
        self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers, dropout=dropout)
    def forward(self, x):
        x, _ = self.lstm(x)
        return x

In [9]:
# torch sequential
class Parameters():
    def __init__(self, parameters):
        self.__dict__.update(parameters)

def CRNN(p):
    return torch.nn.Sequential( #input size = 80000
        torch.nn.Conv2d(1, 64, kernel_size=3),
        torch.nn.ReLU(),
        torch.nn.Conv2d(64, 32, kernel_size=3),
        torch.nn.ReLU(),
        torch.nn.MaxPool2d(kernel_size=3, stride=3),
        
        torch.nn.Conv2d(32, 128, kernel_size=3),
        torch.nn.ReLU(),
        torch.nn.Conv2d(128, 64, kernel_size=3),
        torch.nn.ReLU(),
        torch.nn.MaxPool2d(kernel_size=3, stride=3),
        
        torch.nn.Dropout(p.dropout),
        #permute 3 with 1
        PermuteForLSTM(),
        ReshapForLSTM(),
        SequentialLSTM(1280, 40), # 1280 = 20 freq *64 channels
        torch.nn.Tanh(),
        torch.nn.MaxPool1d(kernel_size=2, stride=2),
        torch.nn.Flatten(),
        torch.nn.Linear(53*20, 11), # Original contains 52*20, I don't know why !
        torch.nn.Softmax(dim=1)
    )

params = Parameters({
    "dropout": 0.2,
})
model = CRNN(params)

In [10]:
model(example.unsqueeze(0)).shape

torch.Size([1, 11])

### Train model

In [11]:
LEARNING_RATE = 1e-3
EPOCHS = 700
MODEL_DROPOUT = params.dropout
EVAL_EACH = 10

wandb.config.update({
    "learning_rate": LEARNING_RATE,
    "epochs": EPOCHS,
    "MODEL" : "RCNN",
    "MODEL_DROPOUT": MODEL_DROPOUT,
    "OPTIMIZER": "ADAM",
    # "batch_size": BATCH_SIZE,
    **kwargs
})

In [None]:
mae = torch.nn.L1Loss()
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
model.to("cuda")

for epoch in range(EPOCHS):
    # -------------------------------- Train loop -------------------------------- #
    train_mean_loss = 0
    train_mean_count_loss = 0
    for d in tqdm(train_loader, "training loop"):
        audios = d[0].to("cuda")
        labels = d[1]
        # one hot encode labels
        count_labels = labels.sum(axis=1)
        one_hot_label = torch.eye(11)[count_labels].to("cuda")
        # forward pass
        predictions = model.forward(audios)
        # count loss
        count_loss_value = loss(predictions, one_hot_label)
        train_mean_count_loss += count_loss_value.item()
        #optimize
        optimizer.zero_grad()
        count_loss_value.backward()
        optimizer.step()
    print("Epoch {}/{}".format(epoch+1, EPOCHS))
    print("Train count Loss : {:.4f}".format(train_mean_count_loss/len(train_loader)))
    log = {
        "CrossEntropy_train_count_loss":train_mean_count_loss/len(train_loader),
        "epoch":epoch
        }
    # --------------------------------- Eval loop -------------------------------- #
    if (epoch+1)%EVAL_EACH == 0:
        val_mean_loss = 0
        val_mean_count_loss = 0
        model.eval()
        for d in tqdm(val_loader, "evaluation loop"):
            audios = d[0].to("cuda")
            labels = d[1]
            # one hot encode labels
            count_labels = labels.sum(axis=1)
            one_hot_label = torch.eye(11)[count_labels].to("cuda")
            # forward pass
            predictions = model.forward(audios)
            # count loss
            count_loss_value = mae(predictions, one_hot_label)
            val_mean_count_loss += count_loss_value.item()
        model.train()
        log["MAE_val_count_loss"] = val_mean_count_loss/len(val_loader)
        print("validation count Loss : {:.4f}".format(val_mean_count_loss/len(val_loader)))
        
    wandb.log(log)
    # wandb.watch(model)

training loop: 100%|██████████| 143/143 [00:40<00:00,  3.57it/s]


Epoch 1/700
Train count Loss : 2.3987


training loop:  37%|███▋      | 53/143 [00:14<00:25,  3.56it/s]