In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/SPEECH/Code

In [None]:
!pip install wandb
!pip install jiwer

This is the main part of the code. It makes models by phases

In [1]:
from dataclasses import dataclass

import PreProcessing, PhaseOneModel, PhaseTwoModel, PhaseThreeModel, Evaluating
import torch
import wandb

In [2]:
@dataclass
class Config:
    learning_rate: float = 0.01
    epochs: int = 300
    batch_size: int = 4
    wandb_init: bool = False

    hyperparams = {
        "n_cnn_layers": 3,
        "n_rnn_layers": 5,
        "rnn_dim": 512,
        "n_class": 28,
        "n_feats": 13,
        "stride": 2,
        "dropout": 0.1,
    }

In [3]:
def create_model(PhaseNumber, phase_model_class, config=None):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using {} device".format(device))
    architecture = PhaseNumber + 'Model'
    # turn on and off wandb logging
    # start a new wandb run to track this script
    if config.wandb_init:
        wandb.init(
            # set the wandb project where this run will be logged
            project="speechRecProj",

            # track hyperparameters and run metadata
            config={
                "learning_rate": config.learning_rate,
                "architecture": architecture,
                "epochs": config.epochs,
                "batch_size": config.batch_size,
            }
        )

    wavs, txts = PreProcessing.load_data(mode='train', data_path=PreProcessing.DATA_PATH)

    # Now you can create a Dataset and DataLoader for your data
    dataset = PreProcessing.AudioDatasetV3(wavs, txts)
    train_dataloader = torch.utils.data.DataLoader(dataset, batch_size=config.batch_size,
                                                   collate_fn=lambda x:
                                                   PreProcessing.process_data(x))
    test_dataloader = None
    # batch size

    # wavs, txts = PreProcessing.load_data(mode='test', data_path=PreProcessing.DATA_PATH)
    # test_dataset = PreProcessing.AudioDataset(wavs, txts)
    # test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=config.batch_size)

    model = PhaseThreeModel.PhaseThreeModel(config,
                                            n_cnn_layers=config.hyperparams['n_cnn_layers'],
                                            n_rnn_layers= config.hyperparams['n_rnn_layers'],
                                            rnn_dim= config.hyperparams['rnn_dim'],
                                            n_class= config.hyperparams['n_class'],
                                            n_feats= config.hyperparams['n_feats'],
                                            stride= config.hyperparams['stride'],
                                            dropout= config.hyperparams['dropout'],
                                            )

    return model, train_dataloader, test_dataloader, device

In [None]:
model, train_dataloader, test_dataloader, device = create_model("PhaseThree", PhaseThreeModel.PhaseThreeModel, config=Config(wandb_init=True))

In [None]:
PhaseThreeModel.train_model_phase_three(model, train_dataloader, device, config=Config(wandb_init=False))

In [4]:
%%javascript
function ClickConnect(){
console.log("Working");
document.querySelector("colab-toolbar-button#connect").click()
}setInterval(ClickConnect,60000)

<IPython.core.display.Javascript object>

In [5]:
run_phase("PhaseThree", PhaseThreeModel.PhaseThreeModel, config=Config(wandb_init=False))

Using cpu device
First Input:  torch.Size([1, 13, 384]) First Label:  tensor([24, 27,  9, 27, 19,  9, 24, 20, 25, 27, 15, 14,  5,  0,  0,  0,  0,  0,
         0,  0,  0,  0]) First Label Length:  13
Epoch:  0 / 300  ( 0.0 %)
Model Output:  ggggngnvsngdngdvdnnfggnnggnhjsnngngssgdegggzggggsgsjgnzgggngigngggggcxgcggggcvggggggnfnnggggggggggfhgggnhgznngggedscgggxgggvgngggfgggcxggvvgngwiggngdgnngnigqginnnnvvnnnnggvvnevzgnnnnddggvzggvgvnnvnzgzelgnfnnnvnsgnnngngnggnnggvnvgvngnnvggfgggnlwniznnnnnggnnvvggnnnevngnjnnnghnfinznzgngzngnngnggnllgsgnznnvnfgnggnngnengngzegngignzggnifgvdgngnggnnngvgnninnngnnuvnvegnnnngffgggnzfgnngnzvg
Model Prediction:  (['fngcfdsndghncgngngdgdsfgegnghgvxghgeigzgngzgigncgecgcvcgngngszgnsngzgzgngengeihnzsgngfegngcngcvgehdgngtgnzgivgnsvhgsnwgnvfngngngjngngivngzvgigngngvezngngfvgnvhdgfngnvznlegnzgngndvzvgngenfngngndnzgngnelzgfngngflzngvgvnfznegngvnfnvngningngezlevenglzgengnfgnlnzgvivngngnfnvgngngvngngng'], ['x i sixty one'])
[1,    10] loss: 69.356
[1,    20] loss: 57.226


KeyboardInterrupt: 