In [1]:
import sys
import time
import tqdm
import glob
import os
import pickle
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from torch import optim
from torch.utils.data import Dataset
from torchvision.datasets import DatasetFolder

In [2]:
class CNNDataset(Dataset):
    def __init__(self, root):
        self.dataset_folder = DatasetFolder(root=root, loader=CNNDataset._npy_loader, extensions=('_mfcc.npy',))
        self.len_ = len(self.dataset_folder)
        self.folder_to_index = self.dataset_folder.class_to_idx

    @staticmethod
    def _npy_loader(path):
        mfcc = np.load(path)
        non_mfcc_file_path = path.replace('mfcc', 'other')
        non_mfcc = np.load(non_mfcc_file_path)

        # in_channels x height x width
        assert mfcc.shape == (3, 13, 30)
        assert non_mfcc.shape == (18, )

        mfcc = torch.from_numpy(mfcc).float()
        non_mfcc = torch.from_numpy(non_mfcc).float()

        return mfcc, non_mfcc, path

    def __getitem__(self, index):

        return self.dataset_folder[index]

    def __len__(self):
        return self.len_

In [3]:
class ResBlock(nn.Module):

    def __init__(self, in_channels, out_channels, kernel_size, stride=1):
        super().__init__()
        padding = (kernel_size - 1) // 2
        self.network = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
                      padding=padding, stride=stride),
            nn.BatchNorm2d(in_channels),
            nn.ReLU(),
            nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size,
                      padding=padding, stride=stride),
            nn.BatchNorm2d(out_channels)
        )
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.network(x)
        out = out + x
        out = self.relu(out)
        return out



In [4]:
class CNNStressNet(nn.Module):

    def __init__(self, reduction='mean'):
        super().__init__()
        self.loss_layer = nn.CrossEntropyLoss(reduction=reduction)
        self.cnn_network = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=(3 - 1)//2, stride=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=(3 - 1)//2, stride=2),
            ResBlock(in_channels=32, out_channels=32, kernel_size=3),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=(3 - 1) // 2, stride=2),
            nn.ReLU(),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=(0, (3 - 1) // 2), stride=2),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=(1, 4))
        )

        self.dnn_network = nn.Sequential(
            nn.Linear(18, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 64)
        )

        self.fully_connected = nn.Sequential(
            nn.BatchNorm1d(num_features=128),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.Dropout(p=0.25),
            nn.ReLU(),
            nn.BatchNorm1d(num_features=512),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.BatchNorm1d(num_features=128),
            nn.ReLU(),
            nn.Linear(128, 2)
        )

    def forward(self, mfcc, non_mfcc):
        n = mfcc.shape[0]
        cnn_out = self.cnn_network(mfcc)
        cnn_out = cnn_out.reshape(n, 64)

        dnn_out = self.dnn_network(non_mfcc)

        out = torch.cat([cnn_out, dnn_out], dim=1)
        out = self.fully_connected(out)

        return out

    def loss(self, predictions, labels):
        loss_val = self.loss_layer(predictions, labels)
        return loss_val


In [5]:
def _remove_files(files):
    for f in files:
        return os.remove(f)


def assert_dir_exits(path):
    if not os.path.exists(path):
        os.makedirs(path)


def save_model(model, epoch, out_path):
    assert_dir_exits(out_path)
    chk_files = glob.glob(out_path + '/' + '*.pth')
    _remove_files(chk_files)
    torch.save(model.state_dict(), out_path + '/' + str(epoch) + '.pth')
    print('model saved for epoch: {}'.format(epoch))


def save_objects(obj, epoch, out_path):
    assert_dir_exits(out_path)
    dat_files = glob.glob(out_path + '/' + '*.dat')
    _remove_files(dat_files)
    # object should be tuple
    with open(out_path + '/' + str(epoch) + '.dat', 'wb') as output:
        pickle.dump(obj, output)

    print('objects saved for epoch: {}'.format(epoch))


def restore_model(model, out_path):
    chk_file = glob.glob(out_path + '/' + '*.pth')

    if chk_file:
        chk_file = str(chk_file[0])
        print('found modeL {}, restoring'.format(chk_file))
        model.load_state_dict(torch.load(chk_file))
    else:
        print('Model not found, using untrained model')
    return model


def restore_objects(out_path, default):
    data_file = glob.glob(out_path + '/' + '*.dat')
    if data_file:
        data_file = str(data_file[0])
        print('found data {}, restoring'.format(data_file))
        with open(data_file, 'rb') as input_:
            obj = pickle.load(input_)

        return obj
    else:
        return default


In [6]:
def update_metrics(pred: torch.Tensor, label: torch.Tensor, metric_dict: dict):
    metric_dict['accuracy'] += torch.sum((pred == label)).item()
    metric_dict['true_pos'] += torch.sum((label == 1) & (pred == 1)).item()
    metric_dict['true_neg'] += torch.sum((label == 0) & (pred == 0)).item()
    metric_dict['false_pos'] += torch.sum((label == 0) & (pred == 1)).item()
    metric_dict['false_neg'] += torch.sum((label == 1) & (pred == 0)).item()





In [7]:
def train(model, device, train_loader, optimizer, epoch, log_interval):
    model.train()
    losses = []
    metric_dict = {
        'accuracy': 0,
        'true_pos': 0,
        'true_neg': 0,
        'false_pos': 0,
        'false_neg': 0
    }

    for batch_idx, ((mfcc, non_mfcc, path), label) in enumerate(tqdm.tqdm(train_loader)):
        mfcc, non_mfcc, label = mfcc.to(device), non_mfcc.to(device), label.to(device)
        optimizer.zero_grad()
        out = model(mfcc, non_mfcc)
        loss = model.loss(out, label)
        with torch.no_grad():
            prob = torch.nn.functional.softmax(out, dim=1)
            pred = torch.argmax(prob, dim=1)
            update_metrics(pred=pred, label=label, metric_dict=metric_dict)

        losses.append(loss.item())
        loss.backward()
        optimizer.step()

        if batch_idx % log_interval == 0:
            print('{} Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                time.ctime(time.time()),
                epoch, batch_idx * len(mfcc), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

    accuracy_mean = (100. * metric_dict['accuracy']) / len(train_loader.dataset)

    metric_dict['batch_losses'] = losses
    metric_dict['accuracy_mean'] = accuracy_mean
    metric_dict['precision'] = (metric_dict["true_pos"]) / (metric_dict["true_pos"] + metric_dict["false_pos"])
    metric_dict['recall'] = (metric_dict["true_pos"]) / (metric_dict["true_pos"] + metric_dict["false_neg"])
    metric_dict['f1_score'] = (2.0 * metric_dict['precision'] * metric_dict['recall']) / \
                              (metric_dict['precision'] + metric_dict['recall'])

    return np.mean(losses), accuracy_mean, metric_dict





In [8]:
def test(model, device, test_loader, log_interval=None):
    model.eval()
    losses = []

    metric_dict = {
        'accuracy': 0,
        'true_pos': 0,
        'true_neg': 0,
        'false_pos': 0,
        'false_neg': 0
    }

    data_check_dict = {'path': [], 'label': [], 'pred': [], 'prob_0': [], 'prob_1': []}

    with torch.no_grad():
        for batch_idx, ((mfcc, non_mfcc, path), label) in enumerate(tqdm.tqdm(test_loader)):
            mfcc, non_mfcc, label = mfcc.to(device), non_mfcc.to(device), label.to(device)
            out = model(mfcc, non_mfcc)
            prob = torch.nn.functional.softmax(out, dim=1)
            test_loss_on = model.loss(out, label).item()
            losses.append(test_loss_on)

            pred = torch.argmax(prob, dim=1)
            update_metrics(pred=pred, label=label, metric_dict=metric_dict)

            if log_interval is not None and batch_idx % log_interval == 0:
                print('{} Test: [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    time.ctime(time.time()),
                    batch_idx * len(mfcc), len(test_loader.dataset),
                    100. * batch_idx / len(test_loader), test_loss_on))

            data_check_dict['path'] += path
            data_check_dict['label'] += label.tolist()
            data_check_dict['pred'] += pred.tolist()
            data_check_dict['prob_0'] += prob[:, 0].tolist()
            data_check_dict['prob_1'] += prob[:, 1].tolist()

    data_check_df = pd.DataFrame(data_check_dict)
    data_check_df.to_csv('data_check_test.csv', index=False)

    test_loss = np.mean(losses)
    accuracy_mean = (100. * metric_dict['accuracy']) / len(test_loader.dataset)

    metric_dict['batch_losses'] = losses
    metric_dict['accuracy_mean'] = accuracy_mean
    metric_dict['precision'] = (metric_dict["true_pos"]) / (metric_dict["true_pos"] + metric_dict["false_pos"])
    metric_dict['recall'] = (metric_dict["true_pos"]) / (metric_dict["true_pos"] + metric_dict["false_neg"])
    metric_dict['f1_score'] = (2.0 * metric_dict['precision'] * metric_dict['recall']) / \
                              (metric_dict['precision'] + metric_dict['recall'])

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{}, ({:.4f})%\n'.format(
        test_loss, metric_dict['accuracy'], len(test_loader.dataset), accuracy_mean))

    return test_loss, accuracy_mean, metric_dict


    # needs three command line arguments
    # 1. root path of train data
    # 2. root path of test data
    # 3. path where saved models are saved
    # 4. Learning rate
    # 5. Number of epochs

In [9]:
train_path = "/home/arunav/Desktop/8th-semester/RE/lexical-stress-detection-master/dev-clean/train_data"
test_path = "/home/arunav/Desktop/8th-semester/RE/lexical-stress-detection-master/dev-clean/test_data"
model_path ="/home/arunav/Desktop/8th-semester/RE/lexical-stress-detection-master/models"
learning_rate =0.01
epochs = 10

In [10]:
def main(train_path, test_path, model_path, learning_rate, epochs):
    print('train path: {}'.format(train_path))
    print('test path: {}'.format(test_path))
    print('model path: {}'.format(model_path))

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    torch.cuda.current_device()
    print('using device', device)

    import multiprocessing
    print('num cpus:', multiprocessing.cpu_count())

    kwargs = {'num_workers': multiprocessing.cpu_count(),
              'pin_memory': True} if use_cuda else {}

    train_dataset = CNNDataset(root=train_path)
    train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True, **kwargs)

    test_dataset = CNNDataset(root=test_path)
    test_loader = DataLoader(test_dataset, batch_size=512, shuffle=True, **kwargs)

    print('Folder to Index: {}'.format(train_dataset.folder_to_index))

    model = CNNStressNet(reduction='mean').to(device)
    model = restore_model(model, model_path)
    last_epoch, max_accuracy, train_losses, test_losses, all_train_metrics, all_test_metrics = \
        restore_objects(model_path, (0, 0, [], [], [], []))

    start = last_epoch + 1 if max_accuracy > 0 else 0

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    test_loss, test_accuracy, test_metrics = test(model, device, test_loader)
    print('Before any training:, test loss is: {}, test_accuracy: {}'.format(test_loss, test_accuracy))

    for epoch in range(start, start + epochs):
        train_loss, train_accuracy, train_metrics = train(model, device, train_loader, optimizer, epoch, 250)
        test_loss, test_accuracy, test_metrics = test(model, device, test_loader)
        print('After epoch: {}, train_loss: {}, test loss is: {}, train_accuracy: {}, test_accuracy: {}'.format(
            epoch, train_loss, test_loss, train_accuracy, test_accuracy))

        train_losses.append(train_loss)
        test_losses.append(test_loss)
        all_train_metrics.append(train_metrics)
        all_test_metrics.append(test_metrics)

        if test_accuracy > max_accuracy:
            max_accuracy = test_accuracy
            save_model(model, epoch, model_path)
            save_objects((epoch, max_accuracy, train_losses, test_losses, all_train_metrics, all_test_metrics),
                         epoch, model_path)
            print('saved epoch: {} as checkpoint'.format(epoch))



In [11]:
main(train_path, test_path, model_path, learning_rate, epochs)

train path: /home/arunav/Desktop/8th-semester/RE/lexical-stress-detection-master/dev-clean/train_data
test path: /home/arunav/Desktop/8th-semester/RE/lexical-stress-detection-master/dev-clean/test_data
model path: /home/arunav/Desktop/8th-semester/RE/lexical-stress-detection-master/models


RuntimeError: cuda runtime error (100) : no CUDA-capable device is detected at /pytorch/aten/src/THC/THCGeneral.cpp:50