# Lets place this notebook in the root directory

In [7]:
import os
path = %pwd
if path.split(os.sep)[-1] == 'notebooks':
    %cd ..

Lets also refresh all our dependecies in run time

In [8]:
%reload_ext autoreload
%autoreload 2

load environment variables, if they exist

In [9]:
from dotenv import load_dotenv

load_dotenv(".env_consts")

True

# Imports

In [10]:
# -------------------------------- torch stuff ------------------------------- #
import torch

# ----------------------------------- other ---------------------------------- #
from tqdm import tqdm
import wandb

# ---------------------------------- Custom ---------------------------------- #
from src.load_dataset import get_splitter_dataloaders

# Get data

In [11]:
F16 = torch.float16
F32 = torch.float32
F64 = torch.float64
FTYPE = F32
TRAIN_SPLIT = float(os.getenv('KLEE_TRAIN_SPLIT', 0.8))
BATCH_SIZE = int(os.getenv('KLEE_BATCH_SIZE', 64))
kwargs = {
        "BATCH_SIZE": BATCH_SIZE,
        "TRAIN_SPLIT": TRAIN_SPLIT,
        "FTYPE": FTYPE,
        "cache": True,
        }
print("kwargs : ",kwargs)
train_loader, val_loader = get_splitter_dataloaders(**kwargs)

kwargs :  {'BATCH_SIZE': 8, 'TRAIN_SPLIT': 0.8, 'FTYPE': torch.float32, 'cache': True}


Caching dataset: 100%|██████████| 5720/5720 [00:53<00:00, 107.84it/s]


# WandB

In [12]:
wandb.init(project="klee_project_audio", entity="mustapha")

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

# Simple model

### Create model

In [13]:
# torch sequential
c_1 = 64
kernel_1 = 21
c_2 = 64
kernel_2 = 7
kernel_pool = 3

c_3 = 128
kernel_3 = 3
c_4 = 256
kernel_4 = 3

dropout = 0.1
model = torch.nn.Sequential( #input size = 80000
    torch.nn.Conv1d(1, c_1, kernel_1, stride=5, dtype=FTYPE),
    torch.nn.Conv1d(c_1, c_2, kernel_2, dtype=FTYPE),
    torch.nn.ReLU(),
    torch.nn.Dropout(dropout),
    torch.nn.MaxPool1d(kernel_pool),
    
    torch.nn.Conv1d(c_2, c_3, kernel_3, dtype=FTYPE),
    torch.nn.Conv1d(c_3, c_4, kernel_4, dtype=FTYPE),
    torch.nn.ReLU(),
    torch.nn.Dropout(dropout),
    torch.nn.MaxPool1d(kernel_pool),
    
    torch.nn.Flatten(),
    torch.nn.Linear(c_4*1775, 2, dtype=FTYPE),
)

### Train model

In [14]:
LEARNING_RATE = 1e-2
EPOCHS = 700
MODEL_DROPOUT = dropout
OPTIMIZER_L2 = 0.001
OPTIMIZER_MOM = 0.9
SCHEDULER_PATIENCE = 10
SCHEDULER_FACTOR = 0.5
SCHEDULER_MIN_LR = 1e-4
CLIP_GRAD = 1e10
EVAL_EACH = 10

wandb.config.update({
    "learning_rate": LEARNING_RATE,
    "epochs": EPOCHS,
    "MODEL_DROPOUT": MODEL_DROPOUT,
    "OPTIMIZER": "SGD",
    "OPTIMIZER_L2":OPTIMIZER_L2,
    "OPTIMIZER_MOM": OPTIMIZER_MOM,
    "batch_size": BATCH_SIZE,
    "SCHEDULER": "ReduceLROnPlateau",
    "SCHEDULER_PATIENCE": SCHEDULER_PATIENCE,
    "SCHEDULER_FACTOR": SCHEDULER_FACTOR,
    "SCHEDULER_MIN_LR": SCHEDULER_MIN_LR,
    "CLIP_GRAD": CLIP_GRAD,
})

In [17]:
loss = torch.nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE, weight_decay=OPTIMIZER_L2, momentum=OPTIMIZER_MOM)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=SCHEDULER_PATIENCE, factor=SCHEDULER_FACTOR,min_lr=SCHEDULER_MIN_LR)

model.to("cuda")

for epoch in range(EPOCHS):
    # -------------------------------- Train loop -------------------------------- #
    train_mean_loss = 0
    train_mean_count_loss = 0
    for d in tqdm(train_loader, "training loop"):
        audios = d[0].to("cuda")
        labels = d[1].to("cuda")
        predictions = model.forward(audios)
        # gender loss
        loss_value = loss(predictions, labels)
        train_mean_loss += loss_value.item()
        # count loss
        count_loss_value = loss(predictions.sum(axis=1), labels.sum(axis=1))
        train_mean_count_loss += count_loss_value.item()
        #optimize
        optimizer.zero_grad()
        loss_value.backward(retain_graph=True)
        count_loss_value.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), -CLIP_GRAD, CLIP_GRAD)
        optimizer.step()
    scheduler.step(train_mean_loss/len(train_loader))
    print("Epoch {}/{}".format(epoch+1, EPOCHS))
    print("Train Loss : {:.4f}".format(train_mean_loss/len(train_loader)), end="\t")
    print("Train count Loss : {:.4f}".format(train_mean_count_loss/len(train_loader)))
    log = {
        "train_loss":train_mean_loss/len(train_loader),
        "count_loss":train_mean_count_loss/len(train_loader),
        "epoch":epoch, 
        "next_lr" :scheduler.state_dict()["_last_lr"][0]
        }
    # --------------------------------- Eval loop -------------------------------- #
    if (epoch+1)%EVAL_EACH == 0:
        val_mean_loss = 0
        val_mean_count_loss = 0
        model.eval()
        for d in tqdm(val_loader, "evaluation loop"):
            audios = d[0].to("cuda")
            labels = d[1].to("cuda")
            predictions = model.forward(audios)
            # gender loss
            loss_value = loss(predictions, labels)
            val_mean_loss += loss_value.item()
            # count loss
            count_loss_value = loss(predictions.sum(axis=1), labels.sum(axis=1))
            val_mean_count_loss += count_loss_value.item()
        model.train()
        log["val_loss"] = val_mean_loss/len(val_loader)
        log["val_count_loss"] = val_mean_count_loss/len(val_loader)
        print("validation Loss : {:.4f}".format(val_mean_loss/len(val_loader)), end="\t")
        print("validation count Loss : {:.4f}".format(val_mean_count_loss/len(val_loader)))
        
    wandb.log(log)
    # wandb.watch(model)

training loop:   0%|          | 0/572 [00:00<?, ?it/s]


RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.