## Setting up your data pipeline


In [1]:
import pandas as pd

df = pd.read_csv('./asr_bengali/utt_spk_text.tsv', sep='\t')
df.columns = ["id", "hash", "text"]
df.head()

Unnamed: 0,id,hash,text
0,000039928e,976b1,এ ধরণের কার্ড নিয়ে
1,00005debc7,f83df,হতে উপার্জিত অর্থ
2,00009e687c,9813c,হাসির বিষয় হয়েই আছে
3,00012843bc,7ec1c,সার্ক দেশগুলোতে
4,00012880b1,e43d4,সহজেই যাওয়া যায়


In [5]:
import os
from comet_ml import Experiment
import torch
import torch.nn as nn
import torch.utils.data as data
import torch.optim as optim
import torch.nn.functional as F
import torchaudio
from util.text_transform import TextTransform
from util.process_decode import data_processing_bengali, greedy_decoder

In [6]:
from sound_ds import SoundDS
from torch.utils.data import random_split

my_ds = SoundDS(df, "./asr_bengali")

# Random split of 80:20 between training and validation
num_items = len(my_ds)
num_train = round(num_items * 0.8)
num_val = num_items - num_train
train_ds, val_ds = random_split(my_ds, [num_train, num_val])

# Create training and validation data loaders
# train_dl = torch.utils.data.DataLoader(train_ds, batch_size=16, shuffle=True)
# val_dl = torch.utils.data.DataLoader(val_ds, batch_size=16, shuffle=False)


In [8]:
learning_rate = 5e-4
batch_size = 10
epochs = 2
train_url = "train-clean-100"
test_url = "test-clean"

h_params = {
    "n_cnn_layers": 3,
    "n_rnn_layers": 5,
    "rnn_dim": 512,
    "n_class": 29,
    "n_feats": 128,
    "stride":2,
    "dropout": 0.1,
    "learning_rate": learning_rate,
    "batch_size": batch_size,
    "epochs": epochs
}

train_audio_transforms = nn.Sequential(
    torchaudio.transforms.MelSpectrogram(sample_rate=16000, n_mels=128),
    torchaudio.transforms.FrequencyMasking(freq_mask_param=30),
    torchaudio.transforms.TimeMasking(time_mask_param=100)
)
valid_audio_transforms = torchaudio.transforms.MelSpectrogram()

text_transform = TextTransform()

use_cuda = torch.cuda.is_available()
torch.manual_seed(7)
device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = data.DataLoader(dataset=train_ds,
                               batch_size=h_params['batch_size'],
                               shuffle=True,
                               collate_fn=lambda x: data_processing_bengali(x, text_transform,
                                                                         train_audio_transforms,
                                                                         valid_audio_transforms, 'train'),
                               **kwargs)
test_loader = data.DataLoader(dataset=val_ds,
                              batch_size=h_params['batch_size'],
                              shuffle=False,
                              collate_fn=lambda x:  data_processing_bengali(x, text_transform,
                                                                        train_audio_transforms,
                                                                        valid_audio_transforms, 'valid'),
                              **kwargs)

