In [2]:
import pathlib
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from sklearn.preprocessing import MultiLabelBinarizer
import utils

In [3]:
data_path = pathlib.Path("../scaper/soundscapes/train")
wav_paths = [_ for _ in data_path.iterdir() if _.suffix == ".wav"]
label_paths = [_.with_suffix(".txt") for _ in wav_paths]

labels = [utils.read_label_txt(_)["label"].to_list() for _ in tqdm(label_paths)]

  0%|          | 0/10000 [00:00<?, ?it/s]

In [4]:
mlb = MultiLabelBinarizer()
label_array = mlb.fit_transform(labels)
classes = mlb.classes_

In [5]:
classes

array(['Alarm_bell_ringing', 'Cat', 'Dishes', 'Dog',
       'Electric_shaver_toothbrush'], dtype=object)

In [6]:
df = pd.DataFrame(data = {
    "relative_path": [_.name for _ in wav_paths],
    "class_ids": [_ for _ in label_array]
})

In [7]:
df.head()

Unnamed: 0,relative_path,class_ids
0,soundscape_unimodal4241.wav,"[0, 1, 1, 0, 1]"
1,soundscape_unimodal1544.wav,"[0, 0, 1, 1, 0]"
2,soundscape_unimodal5168.wav,"[0, 0, 0, 1, 1]"
3,soundscape_unimodal5422.wav,"[0, 0, 1, 1, 0]"
4,soundscape_unimodal3289.wav,"[0, 0, 0, 0, 0]"


In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import init
import torch.optim 
from utils import SoundDS
from torch.utils.data import random_split
from models import AudioClassifier

In [8]:
myds = SoundDS(df, data_path)

In [19]:
# Train / Val Split

num_items = len(myds)
num_train = round(num_items * 0.8)
num_val = num_items - num_train
batch_size = 50

train_ds, val_ds = random_split(myds, [num_train, num_val])
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_dl = torch.utils.data.DataLoader(val_ds, batch_size=batch_size, shuffle=False)

In [12]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [13]:
model = AudioClassifier()
model = model.to(device)

In [16]:
import wandb

# 1. Start a new run
wandb.init(project='endomic', entity='maddonix')

# 2. Save model inputs and hyperparameters
config = wandb.config
# config.learning_rate = 0.01



# # 3. Log gradients and model parameters
# wandb.watch(model)
# for batch_idx, (data, target) in enumerate(train_loader):
#   ...
#   if batch_idx % args.log_interval == 0:
#     # 4. Log metrics to visualize performance
#     wandb.log({"loss": loss})
  

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

In [27]:
# ----------------------------
# Training Loop
# ----------------------------
def training(model, train_dl, num_epochs, classes):
    wandb.init(project='endomic', entity='maddonix')
    config = wandb.config
    
    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001,
                                                steps_per_epoch=int(len(train_dl)),
                                                epochs=num_epochs,
                                                anneal_strategy='linear')
    n_classes = len(classes)

    # Repeat for each epoch
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct_prediction = np.zeros((n_classes))
        total_prediction = 0

        # Repeat for each batch in the training set
        for i, data in enumerate(train_dl):
            # Get the input features and target labels, and put them on the GPU
            inputs, labels = data[0].to(device), data[1].to(device)

            # Normalize the inputs
            inputs_m, inputs_s = inputs.mean(), inputs.std()
            inputs = (inputs - inputs_m) / inputs_s

            # Zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels.type_as(outputs))
            loss.backward()
            optimizer.step()
            scheduler.step()

            # Keep stats for Loss and Accuracy
            running_loss += loss.item()

            # Get the predicted classes with threshold > 0.5
#             _, prediction = torch.max(outputs,1)
            outputs[outputs>0.5] = 1
            outputs[outputs<=0.5] = 0
            prediction = outputs
            
            # Count of predictions that matched the target label
#             print(prediction.shape)
#             print(labels.shape)
#             print((prediction==labels).shape)
#             return prediction == labels
            correct_prediction += (prediction == labels).sum(axis = 0).cpu().numpy()
            total_prediction += prediction.shape[0]

#             if i % 10 == 0 and i > 0:    # print every 10 mini-batches
#                 print(f'Epoch: {epoch}, iteration: {i+1} loss: {running_loss / (i * 100)}')

        # Print stats at the end of the epoch
        num_batches = len(train_dl)
        avg_loss = running_loss / num_batches
        acc = correct_prediction/(total_prediction*n_classes)
        wandb.log({
            "loss": loss,
            "acc": {_: acc[i] for i, _ in enumerate(classes)}
        })
        
        
        print(f'Epoch: {epoch}, Loss: {avg_loss}, Accuracy: {acc}')

    print('Finished Training')

In [28]:
training(model, train_dl, 10, classes)

VBox(children=(Label(value=' 0.01MB of 0.01MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Epoch: 0, Loss: 0.6191074412316084, Accuracy: [0.13155  0.132525 0.135175 0.132975 0.17005 ]
Epoch: 1, Loss: 0.574749032407999, Accuracy: [0.1329  0.13315 0.13555 0.13285 0.17225]
Epoch: 2, Loss: 0.5406540913507343, Accuracy: [0.133175 0.133625 0.1355   0.13445  0.174275]
Epoch: 3, Loss: 0.5268620289862156, Accuracy: [0.133125 0.1335   0.135525 0.136275 0.1762  ]
Epoch: 4, Loss: 0.5141498684883118, Accuracy: [0.133625 0.133275 0.135475 0.13865  0.179525]


  check = torch.cuda.FloatTensor(1).fill_(0)


Epoch: 5, Loss: 0.5060690386220813, Accuracy: [0.133675 0.1341   0.1354   0.14175  0.18035 ]
Epoch: 6, Loss: 0.4990142021328211, Accuracy: [0.13405  0.133525 0.135725 0.14345  0.1821  ]
Epoch: 7, Loss: 0.49378336872905493, Accuracy: [0.13465  0.13345  0.135275 0.145675 0.183425]
Epoch: 8, Loss: 0.4910734578967094, Accuracy: [0.135375 0.134125 0.1355   0.1461   0.18245 ]
Epoch: 9, Loss: 0.48805265314877033, Accuracy: [0.135225 0.134    0.135375 0.146825 0.183375]
Finished Training


In [41]:
def evaluate(model, dl, classes):
    model.eval()
    running_loss = 0.0
    criterion = nn.BCEWithLogitsLoss()
    n_classes = len(classes)
    correct_prediction = np.zeros((n_classes))
    total_prediction = 0

    # Repeat for each batch in the training set
    for i, data in enumerate(dl):
        # Get the input features and target labels, and put them on the GPU
        inputs, labels = data[0].to(device), data[1].to(device)

        # Normalize the inputs
        inputs_m, inputs_s = inputs.mean(), inputs.std()
        inputs = (inputs - inputs_m) / inputs_s


        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels.type_as(outputs))

        # Keep stats for Loss and Accuracy
        running_loss += loss.item()

        # Get the predicted classes with threshold > 0.5
    #             _, prediction = torch.max(outputs,1)
        outputs[outputs>0.5] = 1
        outputs[outputs<=0.5] = 0
        prediction = outputs

        # Count of predictions that matched the target label
        correct_prediction += (prediction == labels).sum(axis = 0).cpu().numpy()
        total_prediction += prediction.shape[0]

    # Print stats at the end of the epoch
    num_batches = len(train_dl)
    avg_loss = running_loss / num_batches
    acc = correct_prediction/(total_prediction*n_classes)
    acc_dict = {classes[j]: _ for j, _ in enumerate(acc)}
    print(f'Loss: {avg_loss}, Accuracy: {acc_dict}')


In [42]:
evaluate(model, val_dl, classes)

Loss: 0.13218424748629332, Accuracy: {'Alarm_bell_ringing': 0.1343, 'Cat': 0.1339, 'Dishes': 0.1286, 'Dog': 0.1433, 'Electric_shaver_toothbrush': 0.1774}


In [44]:
# save
model_path = pathlib.Path("test_model.pth")
torch.save(model.state_dict(), model_path)
