# Frame level speech recognition
## Ayush Kumar (MS19038)

In [1]:
import os
import sys
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
from sklearn.metrics import accuracy_score
import random


from torch.utils import data
from torchvision import transforms
from tqdm import tqdm
import matplotlib.pyplot as plt
import time

# random seed
np.random.seed(11785)
torch.manual_seed(11785)

cuda = torch.cuda.is_available()
device = torch.device("cuda" if cuda else "cpu")
sys.version
print(cuda, sys.version)

False 3.7.12 | packaged by conda-forge | (default, Oct 26 2021, 06:08:53) 
[GCC 9.4.0]


In [3]:
import argparse
import time
import csv
from datetime import datetime
from torchvision import datasets, transforms
from torch.autograd import Variable
from torchvision.models import resnet
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [4]:
import math
class VariableLengthPooling(nn.Module):
    def forward(self, x, **kwargs):
        bounds = kwargs.get("bounds")
        sum_bounds = torch.sum(bounds, dim=1)
        out = torch.bmm(x, bounds) / sum_bounds
        return out

def conv3x3(in_planes, out_planes, kernel_size=3, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv1d(in_planes, out_planes, kernel_size=kernel_size, stride=stride,
                     padding=kernel_size // 2, bias=True)


In [5]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, kernel_size=3, stride=1, downsample=None):
        super().__init__()

        self.conv1 = conv3x3(inplanes, planes, kernel_size=kernel_size, stride=stride)
        self.bn1 = nn.BatchNorm1d(planes)
        self.relu = nn.LeakyReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes, kernel_size=kernel_size, stride=stride)
        self.bn2 = nn.BatchNorm1d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


In [6]:
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, kernel_size=3, stride=1, downsample=None):
        super().__init__()
        
        self.conv1 = nn.Conv1d(in_channels=inplanes, out_channels=planes, kernel_size=1, bias=True)
        self.bn1 = nn.BatchNorm1d(num_features=planes)
        
        self.conv2 = nn.Conv1d(in_channels=planes, out_channels=planes, kernel_size=kernel_size, stride=stride,
                               padding=kernel_size//2, bias=True)
        self.bn2 = nn.BatchNorm1d(num_features=planes)
        
        self.conv3 = nn.Conv1d(in_channels=planes, out_channels=planes * self.expansion, kernel_size=1, bias=True)
        self.bn3 = nn.BatchNorm1d(num_features=planes * self.expansion)
        
        self.relu = nn.LeakyReLU(inplace=True)
        
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


In [7]:
class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=46):
        self.inplanes = 192
        super(ResNet, self).__init__()
        
        self.conv1 = nn.Conv1d(40, 192, kernel_size=3, stride=1, padding=1, bias=True)
        self.bn1 = nn.BatchNorm1d(192)
        
        self.layer0 = self._make_layer(block, 256, layers[0])
        self.layer1 = self._make_layer(block, 256, layers[0], kernel_size=1, stride=1)
        self.layer2 = self._make_layer(block, 256, layers[1], kernel_size=5, stride=1)
        self.layer3 = self._make_layer(block, 256, layers[2], kernel_size=5, stride=1)
        self.layer4 = self._make_layer(block, 512, layers[3], kernel_size=1, stride=1)
        self.layer5 = self._make_layer(block, 512, layers[3], stride=1)

        self.conv_merge = nn.Conv1d(512 * block.expansion, num_classes,
                                    kernel_size=3, stride=1, padding=1,
                                    bias=True)
        self.vlp = VariableLengthPooling()

    def _make_layer(self, block, planes, blocks, kernel_size=3, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv1d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=1, bias=False),
                nn.BatchNorm1d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, kernel_size=kernel_size,
                            stride=stride, downsample=downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, kernel_size=kernel_size))

        return nn.Sequential(*layers)

    def forward(self, x, bounds=None):
        x = F.leaky_relu(self.bn1(self.conv1(x)), inplace=True)
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.conv_merge(x)
        x = torch.squeeze(x, dim=2)
        x = self.vlp(x, bounds=bounds)

        return x


In [8]:
import torch

def to_float_tensor(numpy_array):
    """
    Converts a NumPy array to a PyTorch float tensor.
    """
    return torch.from_numpy(numpy_array).float()

def to_long_tensor(numpy_array):
    """
    Converts a NumPy array to a PyTorch long tensor.
    """
    return torch.from_numpy(numpy_array).long()

def to_tensor(numpy_array):
    """
    Converts a NumPy array to a PyTorch tensor.
    """
    return torch.from_numpy(numpy_array)

def to_variable(tensor):
    """
    Converts a PyTorch tensor to a PyTorch variable, and moves it to the GPU if possible.
    """
    if torch.cuda.is_available():
        # Move tensor to GPU
        tensor = tensor.cuda()
    return torch.autograd.Variable(tensor)

def get_onehot(b, n_phones, n_frames):
    """
    Generates a one-hot encoding of a sequence of frame boundaries given the number of phones and frames.
    """
    b2 = np.concatenate((b[1:], [n_frames]))
    o = np.zeros((n_frames, n_phones))
    p = np.zeros(n_frames, dtype=int)
    for idx, (s, e) in enumerate(zip(b, b2)):
        p[s:e] = idx
    o[range(n_frames), p] = 1
    return o


In [9]:
class MyDataset(Dataset):
    def __init__(self, x, y, for_conv2d=False):
        super().__init__()
        self.x = x
        self.y = y
        self.for_conv2d = for_conv2d
        self.total_phonemes = sum([len(xi[1]) for xi in x])
        print(f"n_utters {self.x.shape[0]}, total_phonemes {self.total_phonemes}")

    def __getitem__(self, idx):
        frames = self.x[idx][0]
        bounds = self.x[idx][1]
        n_phones = len(bounds)
        n_frames = len(frames)
        bounds_onehot = self.get_onehot(bounds, n_phones, n_frames)
        frames = frames.transpose()
        if self.for_conv2d:
            frames = np.expand_dims(frames, axis=0)
        return torch.tensor(frames, dtype=torch.float32), \
               torch.tensor(bounds_onehot, dtype=torch.float32), \
               torch.tensor(self.y[idx] if self.y is not None else np.array([-1]), dtype=torch.long)

    def __len__(self):
        return self.x.shape[0]

    def get_onehot(self, bounds, n_phones, n_frames):
        b2 = np.concatenate((bounds[1:], [n_frames]))
        o = np.zeros((n_frames, n_phones))
        p = np.zeros(n_frames, dtype=int)
        for idx, (s, e) in enumerate(zip(bounds, b2)):
            p[s:e] = idx
        o[range(n_frames), p] = 1
        return o

def get_data_loaders(args, for_conv2d=False):
    print("loading data")
    xtrain = np.load(f"{args.data_dir}/train_data.npy", allow_pickle=True)
    ytrain = np.load(f"{args.data_dir}/train_labels.npy", allow_pickle=True)
    xdev = np.load(f"{args.data_dir}/dev_data.npy", allow_pickle=True)
    ydev = np.load(f"{args.data_dir}/dev_labels.npy", allow_pickle=True)

    print("load complete")
    kwargs = {"num_workers": 3, "pin_memory": True} if args.cuda else {}
    train_loader = DataLoader(
        MyDataset(xtrain, ytrain, for_conv2d=for_conv2d),
        batch_size=args.batch_size, shuffle=True, **kwargs)
    dev_loader = DataLoader(
        MyDataset(xdev, ydev, for_conv2d=for_conv2d),
        batch_size=args.batch_size, shuffle=True, **kwargs)

    return train_loader, dev_loader

def weights_init(m):
    if isinstance(m, nn.Conv1d):
        nn.init.xavier_normal_(m.weight.data)

In [2]:
# The main model
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.firstrun = True
        self.layers = nn.ModuleList([
            nn.Conv1d(40, 192, 3, padding=1),
            nn.BatchNorm1d(192),
            nn.LeakyReLU(inplace=True),

            # A
            nn.Conv1d(192, 192, 3, padding=1),
            nn.BatchNorm1d(192),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(192, 192, 3, padding=1),
            nn.BatchNorm1d(192),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(192, 192, 3, padding=1),
            nn.BatchNorm1d(192),
            nn.LeakyReLU(inplace=True),

            nn.Conv1d(192, 192, 1, padding=0),
            nn.BatchNorm1d(192),
            nn.LeakyReLU(inplace=True),

            # B
            nn.Conv1d(192, 192, 3, padding=1),
            nn.BatchNorm1d(192),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(192, 192, 3, padding=1),
            nn.BatchNorm1d(192),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(192, 256, 3, padding=1),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(inplace=True),

            nn.Conv1d(256, 256, 1, padding=0),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(inplace=True),
            
            # C
            nn.Conv1d(256, 256, 3, padding=1),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(256, 256, 3, padding=1),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(256, 256, 3, padding=1),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(inplace=True),
            #
            nn.Conv1d(256, 256, 1, padding=0),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(inplace=True),

            # D
            nn.Conv1d(256, 512, 3, padding=1),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(512, 512, 3, padding=1),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(512, 512, 3, padding=1),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(inplace=True),

            nn.Conv1d(512, 512, 1, padding=0),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(inplace=True),

            # E
            nn.Conv1d(512, 512, 5, padding=2),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(512, 512, 7, padding=3),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(512, 512, 9, padding=4),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(512, 1024, 11, padding=5),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(inplace=True),
            
            nn.Conv1d(256, 128, 1, padding=0),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(128, 128, 1, padding=0),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(128, 46, 1, padding=0),
            nn.BatchNorm1d(46),
            nn.LeakyReLU(inplace=True),

            nn.Conv1d(1024, 1024, 3, padding=1),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(1024, 1024, 3, padding=1),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(inplace=True),
            nn.Conv1d(1024, 46, 3, padding=1),
            nn.BatchNorm1d(46),
            nn.LeakyReLU(inplace=True),

            VariableLengthPooling()
        ])

    def forward(self, input, bounds=None, print_firstrun=False):
        h = input
        if self.firstrun:
            print("****************************************")
            print("input: {}".format(h.size()))
        for i, layer in enumerate(self.layers):
            if i == len(self.layers) - 1 and isinstance(layer, VariableLengthPooling):
                h = layer(h, bounds=bounds)
            else:
                h = layer(h)
            if print_firstrun and self.firstrun:
                print("{}: {}".format(layer, h.size()))
        if self.firstrun:
            print("****************************************")
        self.firstrun = False
        return h


def MyModelResNet1D():
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes=46)

def train(epoch, model, optimizer, train_loader, args):
    model.train()

    t0 = time.time()
    for batch_idx, (frames, bounds, labels) in enumerate(train_loader):
        if args.cuda:
            frames, bounds, labels = map(lambda x: x.cuda(), [frames, bounds, labels])
        frames, bounds, labels = map(lambda x: Variable(x), [frames, bounds, labels])
        optimizer.zero_grad()

        data = frames
        output = model(data, bounds=bounds)

        n_phones = len(labels.squeeze())
        loss = F.cross_entropy(output.squeeze().transpose(0, 1), labels.squeeze(), size_average=False)
        # Weighted loss. Typical utterance has 72 phonemes
        # L2 REGULARIZATION
        if args.cuda:
             l2_reg = Variable(torch.cuda.FloatTensor(1), requires_grad=True)
        else:
             l2_reg = Variable(torch.FloatTensor(1), requires_grad=True)
        for W in model.parameters():
             l2_reg = l2_reg + W.norm(2)
        l2_reg=l2_reg.squeeze()
        loss += args.l2_reg * l2_reg
        weighted_loss = loss * n_phones / 72.0
        weighted_loss.backward()
        optimizer.step()
        # average loss per phoneme
        avg_loss = loss / n_phones


        if batch_idx % args.log_interval == 0:

            print('Train Epoch: {} Batch: {} [{}/{} ({:.2f}%, time:{:.2f}s)]\tLoss: {:.6f}'.format(
                epoch, batch_idx, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), time.time() - t0,
                avg_loss.data))
            t0 = time.time()


def test(model, test_loader, args):
    model.eval()
    test_loss = 0
    correct = 0
    for frames, bounds, labels in test_loader:
        if args.cuda:
            frames, bounds, labels = map(lambda x: x.cuda(), [frames, bounds, labels])
        frames, bounds, labels = Variable(frames, volatile=True), Variable(bounds), Variable(labels)

        data = frames

        output = model(data, bounds=bounds)
        output = output.squeeze().transpose(0, 1)
        labels = labels.squeeze()
        test_loss += F.cross_entropy(output, labels, size_average=False).data  # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1]  # get the index of the max log-probability
        correct += pred.eq(labels.data.view_as(pred)).cpu().sum()

    test_loss /= test_loader.dataset.total_phonemes
    accuracy = correct / test_loader.dataset.total_phonemes
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.4f}%)\n'.format(
        test_loss, correct, test_loader.dataset.total_phonemes,
        100 * accuracy))
    return "{:.4f}%".format(100. * correct / test_loader.dataset.total_phonemes), accuracy


def main(args):
    print(args)

    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    train_loader, test_loader = get_data_loaders(args, for_conv2d=False)

    model = MyModelResNet1D()

    if args.cuda:
        model.cuda()


    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=5e-5) #1e-4
    scheduler = ReduceLROnPlateau(optimizer, 'max', factor=0.5, patience=2, verbose=True,
                                  threshold_mode='abs', threshold=0.01, min_lr=1e-6)
    for epoch in range(1, args.epochs + 1):
        print(datetime.now())
        train(epoch, model, optimizer, train_loader, args)
        acc_str, acc = test(model, test_loader, args)
        scheduler.step(acc)
        if not os.path.exists(args.weights_dir):
            os.makedirs(args.weights_dir)
        torch.save(model.state_dict(), "{}/{:03d}_{}.w".format(args.weights_dir, epoch, acc_str))


def predict_batch(model, x, bounds, args):
    if args.cuda:
        model.cuda()
        x = x.cuda()
        bounds = bounds.cuda()
    model.eval()
    output = model(Variable(x, volatile=True), bounds=Variable(bounds))
    output = output.squeeze().transpose(0, 1)
    return output.data.max(1, keepdim=True)[1]


def get_test_data_loaders(args):
    print("loading data")
    xtest = np.load(args.data_dir + '/test_data.npy',allow_pickle=True)

    print("load complete")
    kwargs = {'pin_memory': True} if args.cuda else {}
    test_loader = torch.utils.data.DataLoader(
        MyDataset(xtest, None),
        batch_size=args.batch_size, shuffle=False, **kwargs)
    return test_loader


def predict(args, csv_fpath, weights_fpath):
    model = MyModelResNet1D()
    model.load_state_dict(torch.load(weights_fpath))
    test_loader = get_test_data_loaders(args)
    with open(csv_fpath, 'w') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=['Id', 'Label'])
        writer.writeheader()
        cnt = 0
        for batch, (frames, bounds, _) in enumerate(test_loader):
            if batch % args.log_interval == 0:
                print("batch", batch)
            yhat = predict_batch(model, frames, bounds, args)
            for i, y in enumerate(yhat[:]):
                writer.writerow({"Id": cnt + i, "Label": y.cpu()[0]})
            cnt += len(yhat)
    print("done")

In [11]:
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=32, metavar='N',
                    help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                    help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=10, metavar='N',
                    help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
                    help='learning rate (default: 0.001)')
parser.add_argument('--momentum', type=float, default=0.9, metavar='M',
                    help='SGD momentum (default: 0.9)')
parser.add_argument('--l2-reg', type=float, default=0.001,
                    help='l2 regularization')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=500, metavar='N',
                    help='how many batches to wait before logging training status')
parser.add_argument('--data-dir', type=str, default='/kaggle/input/idc-410-spring-2023',
                    help='data directory')
parser.add_argument('--weights-dir', type=str, default='./weights/',
                    help='data directory')

_StoreAction(option_strings=['--weights-dir'], dest='weights_dir', nargs=None, const=None, default='./weights/', type=<class 'str'>, choices=None, help='data directory', metavar=None)

In [None]:
import argparse
if __name__ == "__main__":
    print(torch.__version__)
    args, unknown = parser.parse_known_args()
    args.cuda = not args.no_cuda and torch.cuda.is_available()
    args.batch_size = 1
    main(args)

1.13.0
Namespace(batch_size=1, cuda=True, data_dir='/kaggle/input/idc-410-spring-2023', epochs=10, l2_reg=0.001, log_interval=500, lr=0.001, momentum=0.9, no_cuda=False, seed=1, test_batch_size=1000, weights_dir='./weights/')
loading data
load complete
n_utters 15491, total_phonemes 1204725
n_utters 1722, total_phonemes 133209


  cpuset_checked))


2023-04-01 21:47:26.746777




Train Epoch: 1 Batch: 0 [0/15491 (0.00%, time:5.24s)]	Loss: 4.047507
Train Epoch: 1 Batch: 500 [500/15491 (3.23%, time:22.53s)]	Loss: 1.101856
Train Epoch: 1 Batch: 1000 [1000/15491 (6.46%, time:21.51s)]	Loss: 1.556414
Train Epoch: 1 Batch: 1500 [1500/15491 (9.68%, time:21.78s)]	Loss: 0.883833
Train Epoch: 1 Batch: 2000 [2000/15491 (12.91%, time:21.43s)]	Loss: 0.844787
Train Epoch: 1 Batch: 2500 [2500/15491 (16.14%, time:21.22s)]	Loss: 1.005906
Train Epoch: 1 Batch: 3000 [3000/15491 (19.37%, time:21.82s)]	Loss: 5.629163
Train Epoch: 1 Batch: 3500 [3500/15491 (22.59%, time:21.57s)]	Loss: 1.162315
Train Epoch: 1 Batch: 4000 [4000/15491 (25.82%, time:20.90s)]	Loss: 0.899623
Train Epoch: 1 Batch: 4500 [4500/15491 (29.05%, time:21.12s)]	Loss: 1.084837
Train Epoch: 1 Batch: 5000 [5000/15491 (32.28%, time:21.25s)]	Loss: 0.720578
Train Epoch: 1 Batch: 5500 [5500/15491 (35.50%, time:20.98s)]	Loss: 0.378615
Train Epoch: 1 Batch: 6000 [6000/15491 (38.73%, time:22.06s)]	Loss: 1.189397
Train Epoch:




Test set: Average loss: 0.9629, Accuracy: 100406/133209 (75.3748%)

2023-04-01 21:59:06.729368
Train Epoch: 2 Batch: 0 [0/15491 (0.00%, time:0.24s)]	Loss: 1.346804
Train Epoch: 2 Batch: 500 [500/15491 (3.23%, time:22.36s)]	Loss: 0.880852
Train Epoch: 2 Batch: 1000 [1000/15491 (6.46%, time:21.41s)]	Loss: 0.374020
Train Epoch: 2 Batch: 1500 [1500/15491 (9.68%, time:22.36s)]	Loss: 0.600757
Train Epoch: 2 Batch: 2000 [2000/15491 (12.91%, time:21.96s)]	Loss: 0.501119
Train Epoch: 2 Batch: 2500 [2500/15491 (16.14%, time:21.50s)]	Loss: 0.660453
Train Epoch: 2 Batch: 3000 [3000/15491 (19.37%, time:22.11s)]	Loss: 0.749095
Train Epoch: 2 Batch: 3500 [3500/15491 (22.59%, time:21.88s)]	Loss: 0.844517
Train Epoch: 2 Batch: 4000 [4000/15491 (25.82%, time:21.69s)]	Loss: 0.723685
Train Epoch: 2 Batch: 4500 [4500/15491 (29.05%, time:22.21s)]	Loss: 0.796879
Train Epoch: 2 Batch: 5000 [5000/15491 (32.28%, time:22.22s)]	Loss: 0.943791
Train Epoch: 2 Batch: 5500 [5500/15491 (35.50%, time:22.11s)]	Loss: 0.

In [12]:
# It uses the weight file to calculate the labels and then save it as csv file
predict(args, '/kaggle/working/submission.csv', '/kaggle/working/weights/001_75.4243%.w')

loading data
load complete
n_utters 7377, total_phonemes 570973
batch 0




batch 500
batch 1000
batch 1500
batch 2000
batch 2500
batch 3000
batch 3500
batch 4000
batch 4500
batch 5000
batch 5500
batch 6000
batch 6500
batch 7000
done


In [13]:
import pandas as pd
df=pd.read_csv('/kaggle/working/submission.csv')

In [14]:
# this function convert the string values of labels to int.
def convert(df):
    lst=[]
    a=list(df["Label"])
    for i in range(len(a)):
        if len(a[i])==10:
            lst.append(int(a[i][7:9]))
        else:
            lst.append(int(a[i][7]))
    return df["Id"],lst

In [15]:
# here we are creating a new data frame and saving it as csv file
id1,label=convert(df)
list_of_tuples = list(zip(id1, label))
df = pd.DataFrame(list_of_tuples,
                  columns=['Id', 'Label'])
df.to_csv('output.csv',index=False)