# Initialisation and Dependencies

In [1]:
import sys
lib_path = '/home/jovyan/libs'
sys.path.insert(0, lib_path)

In [26]:
%reload_ext autoreload
%autoreload 2

import gc, math, traceback, datetime

import numpy as np

import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

from datasets import load_from_disk

import whisper
from whisper.tokenizer import get_tokenizer

from utils import audio, gradient, gpu
from utils.attacks import PrepareFront, PrepareAtPosition

# GPU RAM Tracking

In [8]:
device = gpu.get_device()

Device: cuda


# Load Model

In [9]:
try:
    del model
    print("Model deleted!")
except:
    pass

In [10]:
MODEL_NAME = "tiny.en"

model = whisper.load_model(MODEL_NAME).to(device)
model.eval();

  checkpoint = torch.load(fp, map_location=device)


# Load Data

In [11]:
tedlium_path = "../tedlium"
train_path, validation_path, test_path = f"{tedlium_path}/train_idx.hf", f"{tedlium_path}/validation_idx.hf", f"{tedlium_path}/test.hf"

In [12]:
TRAIN_SELECT = 500
VALID_SELECT = 150
TEST_SELECT = 250

SEED = 1

tedlium_train = load_from_disk(train_path).with_format("torch").shuffle(seed=SEED).select(range(TRAIN_SELECT))
tedlium_validation = load_from_disk(validation_path).with_format("torch").shuffle(seed=SEED).select(range(VALID_SELECT))
tedlium_test = load_from_disk(test_path).with_format("torch").shuffle(seed=SEED).select(range(TEST_SELECT))

Loading dataset from disk:   0%|          | 0/109 [00:00<?, ?it/s]

In [13]:
# def collate(ls):
#     pad_to = max(list(map(lambda x: x["audio"].shape[0], ls)))
#     return torch.cat(list(map(lambda x: F.pad(x["audio"], (0, pad_to - x["audio"].shape[0])).unsqueeze(0).to(torch.bfloat16), ls)), dim=0)

def collate_idx(ls):
    return ls[0]["audio"].unsqueeze(0), ls[0]["idx"].item()

TRAIN_BATCH_SIZE = 1 # highly recommended to be 1
VALID_BATCH_SIZE = 1

train_dataset = DataLoader(tedlium_train, batch_size=TRAIN_BATCH_SIZE, collate_fn=collate_idx)
validation_dataset = DataLoader(tedlium_validation, batch_size=VALID_BATCH_SIZE, collate_fn=collate_idx)
test_dataset = DataLoader(tedlium_test)

# Training Loop

In [15]:
gc.collect()
torch.cuda.empty_cache()
gpu.print_cuda_usage()

0.14174842834472656 GB


In [16]:
LR = 1e-3
PATIENCE = 5
MIN_LIMIT = 45
ITER_LIMIT = 30
CLAMP_EP = 0.005
SNIPPET_SIZE = (1, 6_800)
POSITION = 16_000
PREPARE_METHOD = PrepareAtPosition(SNIPPET_SIZE, POSITION)

writer = None

In [19]:
# tensorboard writer
timestamp = datetime.datetime.now().strftime(f'%Y%m%d-%H%M%S_size_{SNIPPET_SIZE}_{PREPARE_METHOD.name}')
writer = SummaryWriter(log_dir=f"../runs/size_tests/{timestamp}", max_queue=5)

In [None]:
best_snippet, snippets, _, _ = gradient.train(model, train_dataset, validation_dataset, 
                                                            PREPARE_METHOD,
                                                            writer, lr=LR, 
                                                            train_success=None, valid_success=None,
                                                            iter_limit=ITER_LIMIT, mins_limit=MIN_LIMIT, patience=PATIENCE, clamp_epsilon=CLAMP_EP)

Prepare method: prepare_at_position
Snippet initialised to [3.107941438429407e-06, 0.004998547025024891] of size (1, 6800)
Clamp: 0.005
Time Limit (Mins): 45
Epochs Limit: 30
Tracking training success: False
Tracking valid success: False


Training:   0% 0/1 [03:01<?, ?it/s, Iter 1, Validation Batch 150/150]

Trng Avg Loss: 7.948866844177246 | Valid Avg Loss: 8.048896789550781 | Patience: 5 | LR: [0.001] | Epoch Limit: 29


Training:   0% 0/1 [05:05<?, ?it/s, Iter 2, Validation Batch 150/150]

Trng Avg Loss: 7.6776909828186035 | Valid Avg Loss: 7.407385349273682 | Patience: 5 | LR: [0.001] | Epoch Limit: 28


Training:   0% 0/1 [05:26<?, ?it/s, Iter 3, Training Batch 109/500]  

In [None]:
audio.view_mel(best_snippet.detach().to("cpu").squeeze())

# Evaluation

In [None]:
gradient.evaluate(best_snippet, PREPARE_METHOD, model, test_dataset, CLAMP_EP, POSITION) # commented to prevent the runtime from autorunning and crashing the thing

In [None]:
snippets = torch.stack(list(map(lambda x: x.cpu(), snippets)) + [best_snippet.cpu()])
snippets.shape

In [None]:
torch.save(snippets.squeeze(), "snippets.pt")

In [None]:
torch.save(torch.stack(list(map(torch.tensor, train_success.values()))), "train_success.pt")
torch.save(torch.tensor(list(train_success.keys())), "train_ids.pt")

In [None]:
torch.save(torch.stack(list(map(torch.tensor, valid_success.values()))), "valid_success.pt")
torch.save(torch.tensor(list(valid_success.keys())), "valid_ids.pt")

# Save and Hear Snippet

In [None]:
def normalise(random_snippet, ep):
    # we assume torch.rand inits to [0, 1)
    res = random_snippet * ep * 2 - ep
    print(f"Normalised, Min {torch.min(res)}, Max {torch.max(res)}")
    return res

In [None]:
# Save snippet to wav file
save_audio(snippet, f"./snippets/clamp_{CLAMP_EP}_{PREPARE_METHOD.name}_snippet_only.wav")

In [None]:
save_audio(PREPARE_METHOD(snippet.to("cpu"), tedlium_test[2]["audio"].unsqueeze(0)), f"./snippets/clamp_{CLAMP_EP}_{PREPARE_METHOD.name}_combined.wav")