In [1]:
import sys
sys.path.insert(0,'..')

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from model import Conformer as con
from data_processing import ukr_lang_chars_handle
from data_processing import CommonVoiceUkr
from model.conformer import Conformer as con

from config import *

In [2]:
tgt_n = 152
target = torch.randn(BATCH_SIZE, tgt_n) # (N, S) where N =batch size and S = max target length 

outputs = torch.randn(BATCH_SIZE, 1, 256, 38) # Tensor of size (T, N, C), where T = input length, N = batch size, and C = number of classes (including blank)
b, cnls, t, clss = outputs.shape
outputs = outputs.view(t*cnls, b, clss)


input_lengths = torch.full(size=(BATCH_SIZE,), fill_value=outputs.shape[0], dtype=torch.long)
target_lengths = torch.full(size=(BATCH_SIZE,), fill_value=target.shape[-1], dtype=torch.long)
print(input_lengths.shape)
print("input lengths:", input_lengths)

print(target_lengths.shape)
print("target lengths:", target_lengths)

ctc_loss = nn.CTCLoss(zero_infinity=False, reduction="none")
loss = ctc_loss(outputs, target, input_lengths, target_lengths)

torch.Size([2])
input lengths: tensor([256, 256])
torch.Size([2])
target lengths: tensor([152, 152])


In [3]:
print(BATCH_SIZE)
ds = CommonVoiceUkr(TRAIN_PATH, TRAIN_SPEC_PATH)
n_ds = len(ds)
n_ds = n_ds - n_ds % BATCH_SIZE

2


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import AdamW
from torch.utils.data import DataLoader

import wandb
import numpy as np
from tqdm import tqdm
import pprint

from config import *
from data_processing import ukr_lang_chars_handle
from data_processing import CommonVoiceUkr
from model import CommandClassifierByEncoder as Model
from model import get_cosine_schedule_with_warmup, OneCycleLR

import os


def train(model, train_dataloader, optimizer, device, scheduler=None, epoch=1, wb=None):
    print(f"Training begin")
    model.train()
    ce_criterion = nn.CrossEntropyLoss()
    running_loss = []
    losses_per_phase = []

    for idx, (X, tgt) in tqdm(enumerate(train_dataloader)):
        tgt_text = " "#tgt["text"]
        tgt_class = tgt["label"].long().to(device)
        tgt_class = F.one_hot(tgt_class, num_classes=5)

        one_hots = ukr_lang_chars_handle.sentences_to_one_hots(tgt_text, 152).to(device)
        X = X.to(device) #

        emb, output = model(X, one_hots)  # (batch, _, n_class, time), (batch, _, time, n_class)
        loss = ce_criterion(output, tgt_class.float()) # output.shape == (N, C) where N - batch, C - number of classes
        if wb:
            wb.log({
                "loss": loss.item(),
                "epoch": epoch
            })
        loss.backward()
        optimizer.step()
        if scheduler:
            scheduler.step()

        running_loss.append(loss.cpu().detach().numpy())
        losses_per_phase.append(loss.cpu().detach().numpy())
        if (idx + 1) % 25 == 0:  # print every 200 mini-batches
            loss_mean = np.mean(np.array(losses_per_phase))
            print(f"Epoch: {epoch}, Last loss: {loss.item():.4f}, Loss phase mean: {loss_mean:.4f}")
            if wb:
                wb.log({"loss phase mean": loss_mean})
            losses_per_phase = []
        optimizer.zero_grad()


def val(model, train_dataloader, device, epoch, wb=None):
    model.eval()
    positive = 0
    train_len = train_dataloader.sampler.num_samples

    print("\n")
    print("Evaluation on train dataset")
    with torch.no_grad():
        for idx, (X, tgt) in tqdm(enumerate(train_dataloader)):
            tgt_text = " "#tgt["text"]
            tgt_class = tgt["label"].long().to(device)
            tgt_class = F.one_hot(tgt_class, num_classes=5)
            one_hots = ukr_lang_chars_handle.sentences_to_one_hots(tgt_text, 152).to(device)
            X = X.to(device)
            emb, output = model(X, one_hots)
            A = torch.argmax(output, dim=-1)
            B = torch.argmax(tgt_class, dim=-1)
            is_right = (A == B)
            positive += torch.sum(is_right)

    train_accuracy = positive / train_len
    if wb:
        wb.log({
            "train accuracy": train_accuracy,
            "epoch": epoch
        })
    print(f"Accuracy on TRAIN dataset: {train_accuracy*100:.2f}%\n")

def get_scheduler(epochs, train_len, optimizer, scheduler_name="cosine_with_warmup"):
    #wandb.config["scheduler"] = scheduler_name
    if scheduler_name == "cosine_with_warmup":
        return get_cosine_schedule_with_warmup(optimizer,
                                                num_warmup_steps=epochs//5,
                                                num_training_steps=epochs - epochs//5)
    elif scheduler_name == "constant":
        return torch.optim.lr_scheduler.ConstantLR(optimizer)
    elif scheduler_name == "exponential":
        return torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
    elif scheduler_name == "one_circle":
        return OneCycleLR(optimizer,
                          max_lr=CONFIG["learning_rate"]*10,
                          total_steps=train_len)


def main():
    #wandb_stat = wandb.init(project="ASR", entity="Alex2135", config=CONFIG)
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

    # Making dataset and loader
    ds = CommonVoiceUkr(TRAIN_PATH, TRAIN_SPEC_PATH, batch_size=BATCH_SIZE)
    train_dataloader = DataLoader(ds, shuffle=True, batch_size=BATCH_SIZE)
    train_val_dataloader = DataLoader(ds, shuffle=True, batch_size=64)
    epochs = CONFIG["epochs"]
    train_len = len(train_dataloader) * epochs

    tgt_n = 152
    model = Model(n_encoders=4, n_decoders=CONFIG["n_decoders"], device=device)
    if CONFIG["pretrain"] == True:
        PATH = os.path.join(DATA_DIR, "model_1.pt")
        model = Model(n_encoders=CONFIG["n_encoders"], n_decoders=CONFIG["n_decoders"], device=device)
        model.load_state_dict(torch.load(PATH))

    # Create optimizator
    optimizer = AdamW(model.parameters(), lr=CONFIG["learning_rate"])
    save_model = True
    scheduler = get_scheduler(CONFIG["epochs"], train_len, optimizer, "cosine_with_warmup")

    for epoch in range(1, epochs + 1):
        print(f"Epoch №{epoch}")
        train(model, train_dataloader, optimizer, device, scheduler=scheduler, epoch=epoch)
        val(model, train_val_dataloader, device, epoch)
        scheduler.step(epoch)
        #wandb.log({"scheduler lr": scheduler.get_last_lr()})

    if save_model:
        PATH = os.path.join(DATA_DIR, "model_1.pt")
        print(f"Save model to path: '{PATH}'")
        torch.save(model.state_dict(), PATH)


if __name__ == "__main__":
    main()

Epoch №1
Training begin


25it [00:14,  1.96it/s]

Epoch: 1, Last loss: 1.9048, Loss phase mean: 1.6208


50it [00:26,  2.02it/s]

Epoch: 1, Last loss: 1.6225, Loss phase mean: 1.6693


57it [00:30,  1.88it/s]


KeyboardInterrupt: 

In [5]:
import matplotlib.pyplot as plt

losses_list = [t.cpu().detach().numpy() if type(t) is torch.Tensor else t for t in running_loss ]
plt.figure(figsize=(12, 10))
plt.plot(losses_list)
plt.show()

NameError: name 'running_loss' is not defined

In [None]:
"""
import matplotlib.pyplot as plt

wtout_zeros = np.array([t.cpu().detach().numpy() if type(t) is torch.Tensor else t for t in running_loss])
print(len(running_loss))
wtout_zeros = wtout_zeros[wtout_zeros != 0]
plt.figure(figsize=(12, 10))
plt.plot(wtout_zeros)
plt.show()
"""

In [None]:
"""
import os
PATH = os.path.join(DATA_DIR, "model_1.pt")
model = con(device=device)
model.load_state_dict(torch.load(PATH))
"""


In [None]:
running_loss[-1]

In [None]:
sent = ("Привіт",)
oh_sent = ukr_lang_chars_handle.sentences_to_one_hots(tgt, 152)
#print(oh_sent)

result = ukr_lang_chars_handle.one_hots_to_sentence(oh_sent)
#print(result)
indeces = ukr_lang_chars_handle.sentence_to_indeces(sent[0])
#print(indeces)

one_hots = F.one_hot(torch.Tensor(indeces).long(), num_classes=38)
#print(one_hots)

reproduced_sent = ukr_lang_chars_handle.onehot_matrix_to_idxs(one_hots)
#print(f"{reproduced_sent=}")

In [None]:
import torch
import torch.nn.functional as F
from data_processing import ukr_lang_chars_handle
from config import *
from model import Conformer as con
from data_processing import CommonVoiceUkr
from torch.utils.data import DataLoader
import pprint

device = "cpu"

PATH = os.path.join(DATA_DIR, "model_2.pt")
model = con(n_encoders=8, n_decoders=8, device=device)
model.load_state_dict(torch.load(PATH))

model.eval()
ds = CommonVoiceUkr(TRAIN_PATH, TRAIN_SPEC_PATH)
train_dataloader = DataLoader(ds, shuffle=True, batch_size=1)

with torch.no_grad():
    X, tgt = next(iter(train_dataloader))
    X = X.to(device)
    print("Target:", tgt)
    print("X shape:", X.shape)
    #tgt = ("",)

    tgt_one_hots = ukr_lang_chars_handle.sentences_to_one_hots(tgt, 152)
    print("tgt to one_hots shape:", tgt_one_hots.shape)
    print("tgt to one_hots:", ukr_lang_chars_handle.one_hots_to_sentences(tgt_one_hots))

    out_data = model(X, tgt_one_hots.to(device))
    out_data = F.softmax(out_data, dim=-1)
    out_data = out_data.cpu()
    print("\n\nOutput data shape:", out_data.shape)
    print("output:", out_data)
    out_data = out_data.transpose(-1, -2).contiguous()
    result = ukr_lang_chars_handle.one_hots_to_sentences(out_data)
    pprint.pprint(len(result))
    pprint.pprint(result)

In [None]:
import os
PATH = os.path.join(DATA_DIR, "model_1.pt")
print(PATH)
torch.save(model.state_dict(), PATH)


In [None]:
num_heads = 6

A = torch.randn(2, 1, 64, 256)
As = A.repeat(1, 6, 1, 1).transpose(-1, -2).contiguous()
As.shape
#torch.stack(A, dim)

In [None]:
A = torch.randn(2, 1, 64, 256)

norm = nn.LayerNorm(256)
norm(A).shape

In [None]:
from model import MaskedSoftmaxCELoss


X = torch.randn(8, 1, 64, 256)
X = torch.squeeze(X, 1)
lin1 = nn.Linear(256, 152)
lin2 = nn.Linear(64, 38)
X = lin1(X)
X = lin2(X.transpose(-1, -2).contiguous())
X = X.transpose(-1, -2).contiguous()
print(X.shape)

oh = torch.randn(8, 1, 38, 152)
oh = eleminate_channels(oh)


ce_criterion = MaskedSoftmaxCELoss()
loss = nn.CrossEntropyLoss()
loss(X, oh)

In [None]:
lin1 = nn.Sequential(nn.Linear(38*256, 2048), nn.ReLU())
lin2 = nn.Sequential(nn.Linear(2048, 1024), nn.ReLU())
lin3 = nn.Sequential(nn.Linear(1024, 5), nn.Softmax(dim=-1))

X = torch.randn([256, 1, 38])
t, b, d = X.shape
X = X.view(b, t*d)

X = lin1(X)
X = lin2(X)
X = lin3(X)
X.shape

In [None]:
import torch.nn.functional as F

X = torch.Tensor([2, 0, 0, 1, 0, 3, 1, 1, 1, 2, 1, 2, 4, 1, 2, 1])
X = torch.Tensor([X])
X = F.one_hot(X)
X