## Setup

In [1]:
from os.path import join

def setup_file_system(in_colab):
    if in_colab:
        from google.colab import drive

        # Set the base and mount path
        MOUNT_PATH_DRIVE = '/content/drive'
        BASE_PATH = join(
            MOUNT_PATH_DRIVE, 
            "MyDrive/project_asr"
        )

        # Mount the google drive
        drive.mount(MOUNT_PATH_DRIVE)

        return BASE_PATH

    else:
        return "/workspaces/project_automated_sound_recognition"

In [2]:
import sys
from os import chdir
from os.path import join

# Method to check if the notebook is running in colab or local
IN_COLAB = 'google.colab' in sys.modules

# Set the base path of the project
BASE_PATH = setup_file_system(IN_COLAB)

# Set the base path of the project
chdir(join(BASE_PATH, "src/"))

In [3]:
%load_ext autoreload
%autoreload 2

# Imports
# Utils
import matplotlib as plt
import numpy as np
import wandb
import sys
import importlib
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import datetime
import json


# DL libraries
import torch
import torch.optim as optim
from torch import nn
import torch.utils.data 
from torch.utils.data import DataLoader

# User libraries
from dataset.audio_sample_dataset import AudioSampleDataset
from model.baseline_model import BaselineModel
from trainer.trainer import train_classification_model
from validator.validator import validate_classification_model
from util import config, util_functions, model_management

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
test_augmentations = {
    'pitch_shift': {
        'enabled': False,
    },
    'noise': {
        'enabled': False,
    },
    'mixup': {
        'enabled': False,
    },
    'freq_mask': {
        'enabled': False,
    },
    'time_mask': {
        'enabled': False,
    }
}

## Run 1 - Pitch shift

In [4]:
augmentations = {
    'pitch_shift': {
        'enabled': True,
        'p': 0.5,
        'min_semitones': -4, 
        "max_semitones": 4,
    },
    'noise': {
        'enabled': False,
    },
    'mixup': {
        'enabled': False,
    },
    'freq_mask': {
        'enabled': False,
    },
    'time_mask': {
        'enabled': False,
    }
}

In [5]:
# Get the train and test data
train_dataset = AudioSampleDataset(
        join(BASE_PATH, config.TRAIN_DATA_PATH),
        augmentations
    )
test_dataset = AudioSampleDataset(
        join(BASE_PATH, config.TEST_DATA_PATH),
        test_augmentations
    )

# Place in dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE)
test_dataloader = DataLoader(test_dataset, batch_size=1)

In [6]:
# Clear gpu cache
torch.cuda.empty_cache()

# Get the model
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
model.fc = nn.Sequential(
    nn.Linear(in_features=512, out_features= 256),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(in_features=256, out_features=len(config.LABELS)),
    nn.Softmax(dim= 1)
)
model.to(config.DEVICE)

# Set the optimizer
optimizer = optim.Adam(model.parameters(), lr=config.LR)

# Set the loss fn
criteria = nn.CrossEntropyLoss()

# Set the gradient scaler
grad_scaler = torch.cuda.amp.grad_scaler.GradScaler()

# Setup weights and biasses
wandb.login()

# Get the current time for the checkpoint name
now = datetime.datetime.now()

# Set the wandb experiment name
experiment_name = util_functions.generate_run_name_from_config(augmentations)

# Start wandb
wandb.init(
    settings=wandb.Settings(start_method="fork"),
    project="project_asr", 
    name=experiment_name, 
    config={
        "learning_rate": config.LR,
        "batch_size": config.BATCH_SIZE,
        "epochs": config.EPOCHS,
        "augmentations": json.dumps(augmentations),
    }
)


Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mrobberdg[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [7]:
# Set the variables to keep track of the best model
best_validation_loss = 10000
best_model_state = model.state_dict()

for epoch in range(config.EPOCHS):
  # Set the model in training mode
  model.train()

  # Train the model
  total_train_loss_this_epoch = train_classification_model(
      model,
      optimizer,
      criteria,
      grad_scaler,
      train_dataloader
  )
  
  # Set the model in evaluation mode
  model.eval()

  # Validate the model
  total_val_loss_this_epoch, f1, f1_avg, accuracy = validate_classification_model(
      model,
      criteria,
      test_dataloader,
  )

  # Calculate the loss values
  train_loss_this_epoch = total_train_loss_this_epoch/len(train_dataloader.dataset)
  val_loss_this_epoch = total_val_loss_this_epoch/len(test_dataloader.dataset)

  # Log the train loss this epoch
  wandb.log({
      'train_loss': train_loss_this_epoch,
      'val_loss': val_loss_this_epoch,
      'f1_score': f1_avg,
      'acc': accuracy
  })

  print(f'epoch: {epoch}, train_loss: {train_loss_this_epoch}, val_loss: {val_loss_this_epoch}. f1: {f1_avg}, acc: {accuracy}')
  print(f1)

  # If this is the best performing model yet, save it
  if val_loss_this_epoch < best_validation_loss:
    # Update the score
    best_validation_loss = val_loss_this_epoch

    now = datetime.datetime.now()

    # Save the model
    checkpoint_path = join(
      BASE_PATH, 
      config.MODEL_CHECKPOINT_PATH, 
      f'{experiment_name}.pth'
    )
    best_model_state = model_management.save_model(model, checkpoint_path, False, '')

100%|██████████| 79/79 [02:05<00:00,  1.59s/it]
100%|██████████| 2000/2000 [00:29<00:00, 66.71it/s]


epoch: 0, train_loss: 0.018137290287017823, val_loss: 2.2926682611703875. f1: 0.10705828916914946, acc: 0.1535
[0.07662835 0.01785714 0.20165746 0.         0.12005457 0.27074236
 0.         0.26119403 0.         0.12244898]


100%|██████████| 79/79 [02:03<00:00,  1.56s/it]
100%|██████████| 2000/2000 [00:30<00:00, 66.07it/s]


epoch: 1, train_loss: 0.01778613398075104, val_loss: 2.2652536702752113. f1: 0.15115650791917487, acc: 0.205
[0.09125475 0.02790698 0.18636364 0.         0.12229299 0.35751295
 0.         0.34181818 0.         0.38441558]


100%|██████████| 79/79 [01:52<00:00,  1.43s/it]
100%|██████████| 2000/2000 [00:39<00:00, 50.28it/s]


epoch: 2, train_loss: 0.01735201005935669, val_loss: 2.2356510112881662. f1: 0.17435068747738788, acc: 0.2255
[0.10791367 0.07079646 0.22604423 0.00843882 0.11888112 0.40172166
 0.         0.38113949 0.         0.42857143]


100%|██████████| 79/79 [02:28<00:00,  1.88s/it]
100%|██████████| 2000/2000 [00:28<00:00, 71.21it/s]


epoch: 3, train_loss: 0.01703935749530792, val_loss: 2.2145909329652786. f1: 0.2097941638547464, acc: 0.2565
[0.19254658 0.19285714 0.25       0.00829876 0.12993631 0.44515103
 0.         0.41393443 0.         0.46521739]


100%|██████████| 79/79 [01:50<00:00,  1.40s/it]
100%|██████████| 2000/2000 [00:27<00:00, 71.78it/s]


epoch: 4, train_loss: 0.01675270049571991, val_loss: 2.1975343222618102. f1: 0.24157672251209586, acc: 0.2895
[0.20987654 0.40611354 0.2632613  0.02419355 0.11       0.49618321
 0.         0.41616162 0.01081081 0.47916667]


100%|██████████| 79/79 [01:50<00:00,  1.40s/it]
100%|██████████| 2000/2000 [00:28<00:00, 71.22it/s]


epoch: 5, train_loss: 0.01642323920726776, val_loss: 2.179512438237667. f1: 0.2517284669899052, acc: 0.3015
[0.13937282 0.37785016 0.2632613  0.0244898  0.10887097 0.51260504
 0.01117318 0.44787645 0.12206573 0.50971922]


100%|██████████| 79/79 [01:50<00:00,  1.40s/it]
100%|██████████| 2000/2000 [00:27<00:00, 71.85it/s]


epoch: 6, train_loss: 0.016095683705806732, val_loss: 2.164947706758976. f1: 0.2789792051041381, acc: 0.3175
[0.15224913 0.3908046  0.26898048 0.048      0.13076923 0.52115813
 0.05319149 0.45792564 0.23396226 0.53275109]


100%|██████████| 79/79 [01:51<00:00,  1.41s/it]
 72%|███████▏  | 1449/2000 [00:20<00:08, 68.28it/s]

In [None]:
now = datetime.datetime.now()

# Save the final model
checkpoint_path = join(
    BASE_PATH, 
    config.MODEL_CHECKPOINT_PATH, 
    f'{experiment_name}.pth'
)
best_model_state = model_management.save_model(model, checkpoint_path, True, 'final_classification_model_unsegmented')

In [None]:
# Mark the run as finished
wandb.finish()

## Run 2 - Noise

In [None]:
augmentations = {
    'pitch_shift': {
        'enabled': False,
    },
    'noise': {
        'enabled': True,
        'p': 0.5,
        'min_amplitude': 0.001,
        'max_amplitude': 0.015,
    },
    'mixup': {
        'enabled': False,
    },
    'freq_mask': {
        'enabled': False
    },
    'time_mask': {
        'enabled': False,
    }
}

In [None]:
# Get the train and test data
train_dataset = AudioSampleDataset(
        join(BASE_PATH, config.TRAIN_DATA_PATH),
        augmentations
    )
test_dataset = AudioSampleDataset(
        join(BASE_PATH, config.TEST_DATA_PATH),
        test_augmentations
    )

# Place in dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE)
test_dataloader = DataLoader(test_dataset, batch_size=1)

In [None]:
# Clear gpu cache
torch.cuda.empty_cache()

# Get the model
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
model.fc = nn.Sequential(
    nn.Linear(in_features=512, out_features= 256),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(in_features=256, out_features=len(config.LABELS)),
    nn.Softmax(dim= 1)
)
model.to(config.DEVICE)

# Set the optimizer
optimizer = optim.Adam(model.parameters(), lr=config.LR)

# Set the loss fn
criteria = nn.CrossEntropyLoss()

# Set the gradient scaler
grad_scaler = torch.cuda.amp.grad_scaler.GradScaler()

# Setup weights and biasses
wandb.login()

# Get the current time for the checkpoint name
now = datetime.datetime.now()

# Set the wandb experiment name
experiment_name = util_functions.generate_run_name_from_config(augmentations)

# Start wandb
wandb.init(
    settings=wandb.Settings(start_method="fork"),
    project="project_asr", 
    name=experiment_name, 
    config={
        "learning_rate": config.LR,
        "batch_size": config.BATCH_SIZE,
        "epochs": config.EPOCHS,
        "augmentations": json.dumps(augmentations),
    }
)


Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mrobberdg[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
# Set the variables to keep track of the best model
best_validation_loss = 10000
best_model_state = model.state_dict()

for epoch in range(config.EPOCHS):
  # Set the model in training mode
  model.train()

  # Train the model
  total_train_loss_this_epoch = train_classification_model(
      model,
      optimizer,
      criteria,
      grad_scaler,
      train_dataloader
  )
  
  # Set the model in evaluation mode
  model.eval()

  # Validate the model
  total_val_loss_this_epoch, f1, f1_avg, accuracy = validate_classification_model(
      model,
      criteria,
      test_dataloader,
  )

  # Calculate the loss values
  train_loss_this_epoch = total_train_loss_this_epoch/len(train_dataloader.dataset)
  val_loss_this_epoch = total_val_loss_this_epoch/len(test_dataloader.dataset)

  # Log the train loss this epoch
  wandb.log({
      'train_loss': train_loss_this_epoch,
      'val_loss': val_loss_this_epoch,
      'f1_score': f1_avg,
      'acc': accuracy
  })

  print(f'epoch: {epoch}, train_loss: {train_loss_this_epoch}, val_loss: {val_loss_this_epoch}. f1: {f1_avg}, acc: {accuracy}')
  print(f1)

  # If this is the best performing model yet, save it
  if val_loss_this_epoch < best_validation_loss:
    # Update the score
    best_validation_loss = val_loss_this_epoch

    now = datetime.datetime.now()

    # Save the model
    checkpoint_path = join(
      BASE_PATH, 
      config.MODEL_CHECKPOINT_PATH, 
      f'{experiment_name}.pth'
    )
    best_model_state = model_management.save_model(model, checkpoint_path, False, '')

100%|██████████| 79/79 [02:05<00:00,  1.59s/it]
100%|██████████| 2000/2000 [00:29<00:00, 66.71it/s]


epoch: 0, train_loss: 0.018137290287017823, val_loss: 2.2926682611703875. f1: 0.10705828916914946, acc: 0.1535
[0.07662835 0.01785714 0.20165746 0.         0.12005457 0.27074236
 0.         0.26119403 0.         0.12244898]


100%|██████████| 79/79 [02:03<00:00,  1.56s/it]
100%|██████████| 2000/2000 [00:30<00:00, 66.07it/s]


epoch: 1, train_loss: 0.01778613398075104, val_loss: 2.2652536702752113. f1: 0.15115650791917487, acc: 0.205
[0.09125475 0.02790698 0.18636364 0.         0.12229299 0.35751295
 0.         0.34181818 0.         0.38441558]


100%|██████████| 79/79 [01:52<00:00,  1.43s/it]
100%|██████████| 2000/2000 [00:39<00:00, 50.28it/s]


epoch: 2, train_loss: 0.01735201005935669, val_loss: 2.2356510112881662. f1: 0.17435068747738788, acc: 0.2255
[0.10791367 0.07079646 0.22604423 0.00843882 0.11888112 0.40172166
 0.         0.38113949 0.         0.42857143]


100%|██████████| 79/79 [02:28<00:00,  1.88s/it]
100%|██████████| 2000/2000 [00:28<00:00, 71.21it/s]


epoch: 3, train_loss: 0.01703935749530792, val_loss: 2.2145909329652786. f1: 0.2097941638547464, acc: 0.2565
[0.19254658 0.19285714 0.25       0.00829876 0.12993631 0.44515103
 0.         0.41393443 0.         0.46521739]


100%|██████████| 79/79 [01:50<00:00,  1.40s/it]
100%|██████████| 2000/2000 [00:27<00:00, 71.78it/s]


epoch: 4, train_loss: 0.01675270049571991, val_loss: 2.1975343222618102. f1: 0.24157672251209586, acc: 0.2895
[0.20987654 0.40611354 0.2632613  0.02419355 0.11       0.49618321
 0.         0.41616162 0.01081081 0.47916667]


100%|██████████| 79/79 [01:50<00:00,  1.40s/it]
100%|██████████| 2000/2000 [00:28<00:00, 71.22it/s]


epoch: 5, train_loss: 0.01642323920726776, val_loss: 2.179512438237667. f1: 0.2517284669899052, acc: 0.3015
[0.13937282 0.37785016 0.2632613  0.0244898  0.10887097 0.51260504
 0.01117318 0.44787645 0.12206573 0.50971922]


100%|██████████| 79/79 [01:50<00:00,  1.40s/it]
100%|██████████| 2000/2000 [00:27<00:00, 71.85it/s]


epoch: 6, train_loss: 0.016095683705806732, val_loss: 2.164947706758976. f1: 0.2789792051041381, acc: 0.3175
[0.15224913 0.3908046  0.26898048 0.048      0.13076923 0.52115813
 0.05319149 0.45792564 0.23396226 0.53275109]


100%|██████████| 79/79 [01:51<00:00,  1.41s/it]
 72%|███████▏  | 1449/2000 [00:20<00:08, 68.28it/s]

In [None]:
now = datetime.datetime.now()

# Save the final model
checkpoint_path = join(
    BASE_PATH, 
    config.MODEL_CHECKPOINT_PATH, 
    f'{experiment_name}.pth'
)
best_model_state = model_management.save_model(model, checkpoint_path, True, 'final_classification_model_unsegmented')

In [None]:
# Mark the run as finished
wandb.finish()

## Run 3 - Mixup

In [None]:
augmentations = {
    'pitch_shift': {
        'enabled': False,
    },
    'noise': {
        'enabled': False,
    },
    'mixup': {
        'enabled': True,
        'p': 0.5,
        'alpha': 0.2,
    },
    'freq_mask': {
        'enabled': False,
    },
    'time_mask': {
        'enabled': False,
    }
}

In [None]:
# Get the train and test data
train_dataset = AudioSampleDataset(
        join(BASE_PATH, config.TRAIN_DATA_PATH),
        augmentations
    )
test_dataset = AudioSampleDataset(
        join(BASE_PATH, config.TEST_DATA_PATH),
        test_augmentations
    )

# Place in dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE)
test_dataloader = DataLoader(test_dataset, batch_size=1)

In [None]:
# Clear gpu cache
torch.cuda.empty_cache()

# Get the model
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
model.fc = nn.Sequential(
    nn.Linear(in_features=512, out_features= 256),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(in_features=256, out_features=len(config.LABELS)),
    nn.Softmax(dim= 1)
)
model.to(config.DEVICE)

# Set the optimizer
optimizer = optim.Adam(model.parameters(), lr=config.LR)

# Set the loss fn
criteria = nn.CrossEntropyLoss()

# Set the gradient scaler
grad_scaler = torch.cuda.amp.grad_scaler.GradScaler()

# Setup weights and biasses
wandb.login()

# Get the current time for the checkpoint name
now = datetime.datetime.now()

# Set the wandb experiment name
experiment_name = util_functions.generate_run_name_from_config(augmentations)

# Start wandb
wandb.init(
    settings=wandb.Settings(start_method="fork"),
    project="project_asr", 
    name=experiment_name, 
    config={
        "learning_rate": config.LR,
        "batch_size": config.BATCH_SIZE,
        "epochs": config.EPOCHS,
        "augmentations": json.dumps(augmentations),
    }
)


Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mrobberdg[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
# Set the variables to keep track of the best model
best_validation_loss = 10000
best_model_state = model.state_dict()

for epoch in range(config.EPOCHS):
  # Set the model in training mode
  model.train()

  # Train the model
  total_train_loss_this_epoch = train_classification_model(
      model,
      optimizer,
      criteria,
      grad_scaler,
      train_dataloader
  )
  
  # Set the model in evaluation mode
  model.eval()

  # Validate the model
  total_val_loss_this_epoch, f1, f1_avg, accuracy = validate_classification_model(
      model,
      criteria,
      test_dataloader,
  )

  # Calculate the loss values
  train_loss_this_epoch = total_train_loss_this_epoch/len(train_dataloader.dataset)
  val_loss_this_epoch = total_val_loss_this_epoch/len(test_dataloader.dataset)

  # Log the train loss this epoch
  wandb.log({
      'train_loss': train_loss_this_epoch,
      'val_loss': val_loss_this_epoch,
      'f1_score': f1_avg,
      'acc': accuracy
  })

  print(f'epoch: {epoch}, train_loss: {train_loss_this_epoch}, val_loss: {val_loss_this_epoch}. f1: {f1_avg}, acc: {accuracy}')
  print(f1)

  # If this is the best performing model yet, save it
  if val_loss_this_epoch < best_validation_loss:
    # Update the score
    best_validation_loss = val_loss_this_epoch

    now = datetime.datetime.now()

    # Save the model
    checkpoint_path = join(
      BASE_PATH, 
      config.MODEL_CHECKPOINT_PATH, 
      f'{experiment_name}.pth'
    )
    best_model_state = model_management.save_model(model, checkpoint_path, False, '')

100%|██████████| 79/79 [02:05<00:00,  1.59s/it]
100%|██████████| 2000/2000 [00:29<00:00, 66.71it/s]


epoch: 0, train_loss: 0.018137290287017823, val_loss: 2.2926682611703875. f1: 0.10705828916914946, acc: 0.1535
[0.07662835 0.01785714 0.20165746 0.         0.12005457 0.27074236
 0.         0.26119403 0.         0.12244898]


100%|██████████| 79/79 [02:03<00:00,  1.56s/it]
100%|██████████| 2000/2000 [00:30<00:00, 66.07it/s]


epoch: 1, train_loss: 0.01778613398075104, val_loss: 2.2652536702752113. f1: 0.15115650791917487, acc: 0.205
[0.09125475 0.02790698 0.18636364 0.         0.12229299 0.35751295
 0.         0.34181818 0.         0.38441558]


100%|██████████| 79/79 [01:52<00:00,  1.43s/it]
100%|██████████| 2000/2000 [00:39<00:00, 50.28it/s]


epoch: 2, train_loss: 0.01735201005935669, val_loss: 2.2356510112881662. f1: 0.17435068747738788, acc: 0.2255
[0.10791367 0.07079646 0.22604423 0.00843882 0.11888112 0.40172166
 0.         0.38113949 0.         0.42857143]


100%|██████████| 79/79 [02:28<00:00,  1.88s/it]
100%|██████████| 2000/2000 [00:28<00:00, 71.21it/s]


epoch: 3, train_loss: 0.01703935749530792, val_loss: 2.2145909329652786. f1: 0.2097941638547464, acc: 0.2565
[0.19254658 0.19285714 0.25       0.00829876 0.12993631 0.44515103
 0.         0.41393443 0.         0.46521739]


100%|██████████| 79/79 [01:50<00:00,  1.40s/it]
100%|██████████| 2000/2000 [00:27<00:00, 71.78it/s]


epoch: 4, train_loss: 0.01675270049571991, val_loss: 2.1975343222618102. f1: 0.24157672251209586, acc: 0.2895
[0.20987654 0.40611354 0.2632613  0.02419355 0.11       0.49618321
 0.         0.41616162 0.01081081 0.47916667]


100%|██████████| 79/79 [01:50<00:00,  1.40s/it]
100%|██████████| 2000/2000 [00:28<00:00, 71.22it/s]


epoch: 5, train_loss: 0.01642323920726776, val_loss: 2.179512438237667. f1: 0.2517284669899052, acc: 0.3015
[0.13937282 0.37785016 0.2632613  0.0244898  0.10887097 0.51260504
 0.01117318 0.44787645 0.12206573 0.50971922]


100%|██████████| 79/79 [01:50<00:00,  1.40s/it]
100%|██████████| 2000/2000 [00:27<00:00, 71.85it/s]


epoch: 6, train_loss: 0.016095683705806732, val_loss: 2.164947706758976. f1: 0.2789792051041381, acc: 0.3175
[0.15224913 0.3908046  0.26898048 0.048      0.13076923 0.52115813
 0.05319149 0.45792564 0.23396226 0.53275109]


100%|██████████| 79/79 [01:51<00:00,  1.41s/it]
 72%|███████▏  | 1449/2000 [00:20<00:08, 68.28it/s]

In [None]:
now = datetime.datetime.now()

# Save the final model
checkpoint_path = join(
    BASE_PATH, 
    config.MODEL_CHECKPOINT_PATH, 
    f'{experiment_name}.pth'
)
best_model_state = model_management.save_model(model, checkpoint_path, True, 'final_classification_model_unsegmented')

In [None]:
# Mark the run as finished
wandb.finish()

## Run 4 - Frequency mask

In [None]:
augmentations = {
    'pitch_shift': {
        'enabled': False,
    },
    'noise': {
        'enabled': False,
    },
    'mixup': {
        'enabled': False,
    },
    'freq_mask': {
        'enabled': True,
        'p': 0.5,
        'freq_mask_param': 5,
    },
    'time_mask': {
        'enabled': False,
    }
}

In [None]:
# Get the train and test data
train_dataset = AudioSampleDataset(
        join(BASE_PATH, config.TRAIN_DATA_PATH),
        augmentations
    )
test_dataset = AudioSampleDataset(
        join(BASE_PATH, config.TEST_DATA_PATH),
        test_augmentations
    )

# Place in dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE)
test_dataloader = DataLoader(test_dataset, batch_size=1)

In [None]:
# Clear gpu cache
torch.cuda.empty_cache()

# Get the model
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
model.fc = nn.Sequential(
    nn.Linear(in_features=512, out_features= 256),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(in_features=256, out_features=len(config.LABELS)),
    nn.Softmax(dim= 1)
)
model.to(config.DEVICE)

# Set the optimizer
optimizer = optim.Adam(model.parameters(), lr=config.LR)

# Set the loss fn
criteria = nn.CrossEntropyLoss()

# Set the gradient scaler
grad_scaler = torch.cuda.amp.grad_scaler.GradScaler()

# Setup weights and biasses
wandb.login()

# Get the current time for the checkpoint name
now = datetime.datetime.now()

# Set the wandb experiment name
experiment_name = util_functions.generate_run_name_from_config(augmentations)

# Start wandb
wandb.init(
    settings=wandb.Settings(start_method="fork"),
    project="project_asr", 
    name=experiment_name, 
    config={
        "learning_rate": config.LR,
        "batch_size": config.BATCH_SIZE,
        "epochs": config.EPOCHS,
        "augmentations": json.dumps(augmentations),
    }
)


Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mrobberdg[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
# Set the variables to keep track of the best model
best_validation_loss = 10000
best_model_state = model.state_dict()

for epoch in range(config.EPOCHS):
  # Set the model in training mode
  model.train()

  # Train the model
  total_train_loss_this_epoch = train_classification_model(
      model,
      optimizer,
      criteria,
      grad_scaler,
      train_dataloader
  )
  
  # Set the model in evaluation mode
  model.eval()

  # Validate the model
  total_val_loss_this_epoch, f1, f1_avg, accuracy = validate_classification_model(
      model,
      criteria,
      test_dataloader,
  )

  # Calculate the loss values
  train_loss_this_epoch = total_train_loss_this_epoch/len(train_dataloader.dataset)
  val_loss_this_epoch = total_val_loss_this_epoch/len(test_dataloader.dataset)

  # Log the train loss this epoch
  wandb.log({
      'train_loss': train_loss_this_epoch,
      'val_loss': val_loss_this_epoch,
      'f1_score': f1_avg,
      'acc': accuracy
  })

  print(f'epoch: {epoch}, train_loss: {train_loss_this_epoch}, val_loss: {val_loss_this_epoch}. f1: {f1_avg}, acc: {accuracy}')
  print(f1)

  # If this is the best performing model yet, save it
  if val_loss_this_epoch < best_validation_loss:
    # Update the score
    best_validation_loss = val_loss_this_epoch

    now = datetime.datetime.now()

    # Save the model
    checkpoint_path = join(
      BASE_PATH, 
      config.MODEL_CHECKPOINT_PATH, 
      f'{experiment_name}.pth'
    )
    best_model_state = model_management.save_model(model, checkpoint_path, False, '')

100%|██████████| 79/79 [02:05<00:00,  1.59s/it]
100%|██████████| 2000/2000 [00:29<00:00, 66.71it/s]


epoch: 0, train_loss: 0.018137290287017823, val_loss: 2.2926682611703875. f1: 0.10705828916914946, acc: 0.1535
[0.07662835 0.01785714 0.20165746 0.         0.12005457 0.27074236
 0.         0.26119403 0.         0.12244898]


100%|██████████| 79/79 [02:03<00:00,  1.56s/it]
100%|██████████| 2000/2000 [00:30<00:00, 66.07it/s]


epoch: 1, train_loss: 0.01778613398075104, val_loss: 2.2652536702752113. f1: 0.15115650791917487, acc: 0.205
[0.09125475 0.02790698 0.18636364 0.         0.12229299 0.35751295
 0.         0.34181818 0.         0.38441558]


100%|██████████| 79/79 [01:52<00:00,  1.43s/it]
100%|██████████| 2000/2000 [00:39<00:00, 50.28it/s]


epoch: 2, train_loss: 0.01735201005935669, val_loss: 2.2356510112881662. f1: 0.17435068747738788, acc: 0.2255
[0.10791367 0.07079646 0.22604423 0.00843882 0.11888112 0.40172166
 0.         0.38113949 0.         0.42857143]


100%|██████████| 79/79 [02:28<00:00,  1.88s/it]
100%|██████████| 2000/2000 [00:28<00:00, 71.21it/s]


epoch: 3, train_loss: 0.01703935749530792, val_loss: 2.2145909329652786. f1: 0.2097941638547464, acc: 0.2565
[0.19254658 0.19285714 0.25       0.00829876 0.12993631 0.44515103
 0.         0.41393443 0.         0.46521739]


100%|██████████| 79/79 [01:50<00:00,  1.40s/it]
100%|██████████| 2000/2000 [00:27<00:00, 71.78it/s]


epoch: 4, train_loss: 0.01675270049571991, val_loss: 2.1975343222618102. f1: 0.24157672251209586, acc: 0.2895
[0.20987654 0.40611354 0.2632613  0.02419355 0.11       0.49618321
 0.         0.41616162 0.01081081 0.47916667]


100%|██████████| 79/79 [01:50<00:00,  1.40s/it]
100%|██████████| 2000/2000 [00:28<00:00, 71.22it/s]


epoch: 5, train_loss: 0.01642323920726776, val_loss: 2.179512438237667. f1: 0.2517284669899052, acc: 0.3015
[0.13937282 0.37785016 0.2632613  0.0244898  0.10887097 0.51260504
 0.01117318 0.44787645 0.12206573 0.50971922]


100%|██████████| 79/79 [01:50<00:00,  1.40s/it]
100%|██████████| 2000/2000 [00:27<00:00, 71.85it/s]


epoch: 6, train_loss: 0.016095683705806732, val_loss: 2.164947706758976. f1: 0.2789792051041381, acc: 0.3175
[0.15224913 0.3908046  0.26898048 0.048      0.13076923 0.52115813
 0.05319149 0.45792564 0.23396226 0.53275109]


100%|██████████| 79/79 [01:51<00:00,  1.41s/it]
 72%|███████▏  | 1449/2000 [00:20<00:08, 68.28it/s]

In [None]:
now = datetime.datetime.now()

# Save the final model
checkpoint_path = join(
    BASE_PATH, 
    config.MODEL_CHECKPOINT_PATH, 
    f'{experiment_name}.pth'
)
best_model_state = model_management.save_model(model, checkpoint_path, True, 'final_classification_model_unsegmented')

In [None]:
# Mark the run as finished
wandb.finish()

## Run 5 - Time mask

In [None]:
augmentations = {
    'pitch_shift': {
        'enabled': False,
    },
    'noise': {
        'enabled': False,
    },
    'mixup': {
        'enabled': False,
    },
    'freq_mask': {
        'enabled': False,
    },
    'time_mask': {
        'enabled': True,
        'p': 0.5,
        'time_mask_param': 5,
    }
}

In [None]:
# Get the train and test data
train_dataset = AudioSampleDataset(
        join(BASE_PATH, config.TRAIN_DATA_PATH),
        augmentations
    )
test_dataset = AudioSampleDataset(
        join(BASE_PATH, config.TEST_DATA_PATH),
        test_augmentations
    )

# Place in dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE)
test_dataloader = DataLoader(test_dataset, batch_size=1)

In [None]:
# Clear gpu cache
torch.cuda.empty_cache()

# Get the model
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
model.fc = nn.Sequential(
    nn.Linear(in_features=512, out_features= 256),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(in_features=256, out_features=len(config.LABELS)),
    nn.Softmax(dim= 1)
)
model.to(config.DEVICE)

# Set the optimizer
optimizer = optim.Adam(model.parameters(), lr=config.LR)

# Set the loss fn
criteria = nn.CrossEntropyLoss()

# Set the gradient scaler
grad_scaler = torch.cuda.amp.grad_scaler.GradScaler()

# Setup weights and biasses
wandb.login()

# Get the current time for the checkpoint name
now = datetime.datetime.now()

# Set the wandb experiment name
experiment_name = util_functions.generate_run_name_from_config(augmentations)

# Start wandb
wandb.init(
    settings=wandb.Settings(start_method="fork"),
    project="project_asr", 
    name=experiment_name, 
    config={
        "learning_rate": config.LR,
        "batch_size": config.BATCH_SIZE,
        "epochs": config.EPOCHS,
        "augmentations": json.dumps(augmentations),
    }
)


Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mrobberdg[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
# Set the variables to keep track of the best model
best_validation_loss = 10000
best_model_state = model.state_dict()

for epoch in range(config.EPOCHS):
  # Set the model in training mode
  model.train()

  # Train the model
  total_train_loss_this_epoch = train_classification_model(
      model,
      optimizer,
      criteria,
      grad_scaler,
      train_dataloader
  )
  
  # Set the model in evaluation mode
  model.eval()

  # Validate the model
  total_val_loss_this_epoch, f1, f1_avg, accuracy = validate_classification_model(
      model,
      criteria,
      test_dataloader,
  )

  # Calculate the loss values
  train_loss_this_epoch = total_train_loss_this_epoch/len(train_dataloader.dataset)
  val_loss_this_epoch = total_val_loss_this_epoch/len(test_dataloader.dataset)

  # Log the train loss this epoch
  wandb.log({
      'train_loss': train_loss_this_epoch,
      'val_loss': val_loss_this_epoch,
      'f1_score': f1_avg,
      'acc': accuracy
  })

  print(f'epoch: {epoch}, train_loss: {train_loss_this_epoch}, val_loss: {val_loss_this_epoch}. f1: {f1_avg}, acc: {accuracy}')
  print(f1)

  # If this is the best performing model yet, save it
  if val_loss_this_epoch < best_validation_loss:
    # Update the score
    best_validation_loss = val_loss_this_epoch

    now = datetime.datetime.now()

    # Save the model
    checkpoint_path = join(
      BASE_PATH, 
      config.MODEL_CHECKPOINT_PATH, 
      f'{experiment_name}.pth'
    )
    best_model_state = model_management.save_model(model, checkpoint_path, False, '')

100%|██████████| 79/79 [02:05<00:00,  1.59s/it]
100%|██████████| 2000/2000 [00:29<00:00, 66.71it/s]


epoch: 0, train_loss: 0.018137290287017823, val_loss: 2.2926682611703875. f1: 0.10705828916914946, acc: 0.1535
[0.07662835 0.01785714 0.20165746 0.         0.12005457 0.27074236
 0.         0.26119403 0.         0.12244898]


100%|██████████| 79/79 [02:03<00:00,  1.56s/it]
100%|██████████| 2000/2000 [00:30<00:00, 66.07it/s]


epoch: 1, train_loss: 0.01778613398075104, val_loss: 2.2652536702752113. f1: 0.15115650791917487, acc: 0.205
[0.09125475 0.02790698 0.18636364 0.         0.12229299 0.35751295
 0.         0.34181818 0.         0.38441558]


100%|██████████| 79/79 [01:52<00:00,  1.43s/it]
100%|██████████| 2000/2000 [00:39<00:00, 50.28it/s]


epoch: 2, train_loss: 0.01735201005935669, val_loss: 2.2356510112881662. f1: 0.17435068747738788, acc: 0.2255
[0.10791367 0.07079646 0.22604423 0.00843882 0.11888112 0.40172166
 0.         0.38113949 0.         0.42857143]


100%|██████████| 79/79 [02:28<00:00,  1.88s/it]
100%|██████████| 2000/2000 [00:28<00:00, 71.21it/s]


epoch: 3, train_loss: 0.01703935749530792, val_loss: 2.2145909329652786. f1: 0.2097941638547464, acc: 0.2565
[0.19254658 0.19285714 0.25       0.00829876 0.12993631 0.44515103
 0.         0.41393443 0.         0.46521739]


100%|██████████| 79/79 [01:50<00:00,  1.40s/it]
100%|██████████| 2000/2000 [00:27<00:00, 71.78it/s]


epoch: 4, train_loss: 0.01675270049571991, val_loss: 2.1975343222618102. f1: 0.24157672251209586, acc: 0.2895
[0.20987654 0.40611354 0.2632613  0.02419355 0.11       0.49618321
 0.         0.41616162 0.01081081 0.47916667]


100%|██████████| 79/79 [01:50<00:00,  1.40s/it]
100%|██████████| 2000/2000 [00:28<00:00, 71.22it/s]


epoch: 5, train_loss: 0.01642323920726776, val_loss: 2.179512438237667. f1: 0.2517284669899052, acc: 0.3015
[0.13937282 0.37785016 0.2632613  0.0244898  0.10887097 0.51260504
 0.01117318 0.44787645 0.12206573 0.50971922]


100%|██████████| 79/79 [01:50<00:00,  1.40s/it]
100%|██████████| 2000/2000 [00:27<00:00, 71.85it/s]


epoch: 6, train_loss: 0.016095683705806732, val_loss: 2.164947706758976. f1: 0.2789792051041381, acc: 0.3175
[0.15224913 0.3908046  0.26898048 0.048      0.13076923 0.52115813
 0.05319149 0.45792564 0.23396226 0.53275109]


100%|██████████| 79/79 [01:51<00:00,  1.41s/it]
 72%|███████▏  | 1449/2000 [00:20<00:08, 68.28it/s]

In [None]:
now = datetime.datetime.now()

# Save the final model
checkpoint_path = join(
    BASE_PATH, 
    config.MODEL_CHECKPOINT_PATH, 
    f'{experiment_name}.pth'
)
best_model_state = model_management.save_model(model, checkpoint_path, True, 'final_classification_model_unsegmented')

In [None]:
# Mark the run as finished
wandb.finish()