In [1]:
import numpy as np
from datetime import datetime
import time
from scipy.fftpack import fft
from scipy import signal
from scipy.io import wavfile
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
import IPython.display as ipd
import pandas as pd
from pandas import DataFrame as df

In [2]:
# plot example cough
train_path = 'Freesound_Audio_Train/'
file = '02d6b747.wav'
sample_rate, samples = wavfile.read(str(train_path) + file)
tr_labels = pd.read_csv('train_labels.csv')
ts_labels = pd.read_csv('test_labels.csv')

In [3]:
"""Log of spectrogram is much cleaner for plotting and is connected to how humans hear.
see https://www.kaggle.com/davids1992/speech-representation-and-data-exploration"""
def log_specgram(audio, sample_rate, window_size=20,
                 step_size=10, eps=1e-10):
    nperseg = int(round(window_size * sample_rate / 1e3))
    noverlap = int(round(step_size * sample_rate / 1e3))
    freqs, times, spec = signal.spectrogram(audio,
                                    fs=sample_rate,
                                    window='hann',
                                    nperseg=nperseg,
                                    noverlap=noverlap,
                                    detrend=False)
    return freqs, times, np.log(spec.T.astype(np.float32) + eps)

In [4]:
print(sample_rate)
print('\n ' + str(len(samples)))
print(sample_rate/len(samples))

44100

 96138
0.45871559633027525


# Dataset & Visualizing Classes

In [5]:
import torch
import sys
from torch.utils.data import Dataset, DataLoader
from torchvision import *
from datetime import datetime
from skimage import io, transform
from PIL import Image
#from torchaudio.transforms import MFCC
import librosa
import os
import time
import copy

In [6]:
# turn our freesound dataset into cough, 1, and non-cough, 0 labels
train_labels = pd.read_csv('train_labels.csv')
test_labels = pd.read_csv('test_labels.csv')

In [7]:
for idx,item in enumerate(train_labels.iloc[:,1].drop_duplicates()):
    if idx == 0:
        train_non_cough_idx = train_labels[train_labels.iloc[:,1] == item].index.values[:6]
    if item != 'Cough':
        train_non_cough_idx = np.append(train_non_cough_idx,train_labels[train_labels.iloc[:,1] == item].index.values[:6])
        
for idx,item in enumerate(test_labels.iloc[:,1].drop_duplicates()):
    if idx == 0:
        test_non_cough_idx = test_labels[test_labels.iloc[:,1] == item].index.values[:6]
    if item != 'Cough':
        test_non_cough_idx = np.append(test_non_cough_idx,test_labels[test_labels.iloc[:,1] == item].index.values[:6])

In [8]:
train_non_cough_idx.shape

(246,)

In [9]:
train_cough_idx = train_labels[train_labels.iloc[:,1] == 'Cough'].index.values
test_cough_idx = test_labels[test_labels.iloc[:,1] == 'Cough'].index.values

In [10]:
new_train = np.zeros((489,5)).astype('int')

In [11]:
train_non_idx = train_labels.copy().iloc[train_non_cough_idx,1][:246].index.values
train_non_cough_labels = train_labels.copy().iloc[train_non_idx,:]
train_non_cough_labels.iloc[:,1] = 0
train_cough_labels = train_labels.copy().iloc[train_cough_idx,:]
train_cough_labels.iloc[:,1] = 1

In [12]:
test_non_idx = test_labels.copy().iloc[test_non_cough_idx,1][:30].index.values
test_non_cough_labels = test_labels.copy().iloc[test_non_idx,:]
test_non_cough_labels.iloc[:,1] = 0
test_cough_labels = test_labels.copy().iloc[test_cough_idx,:]
test_cough_labels.iloc[:,1] = 1

In [13]:
new_train = pd.concat([train_non_cough_labels,train_cough_labels])
new_test = pd.concat([test_non_cough_labels,test_cough_labels])

In [14]:
new_train.to_csv('train_binary_labels.csv', index=False)
new_test.to_csv('test_binary_labels.csv', index=False)

In [15]:
new_train.shape

(489, 5)

In [75]:
class FreesoundDataset(Dataset):
    def __init__(self, csv_labels, root_dir, transform=None, test=False, evaluate=False):
        """
        Args:
            csv_labels: path to labels file
            root_dir: path to file directory
            transform: optional transforms
            test: training vs testing mode
        """
        self.labels = pd.read_csv(csv_labels)
        self.transform = transform
        self.test = test
        self.evaluate = evaluate
        
        # root path depends on training vs testing data files
        if self.test:
            self.root = os.path.join(root_dir,'Freesound_Audio_Test')
        else:
            self.root = os.path.join(root_dir,'Freesound_Audio_Train')

        if self.evaluate:
            self.root = root_dir

    def __len__(self):
        return self.labels.shape[0]
    
    def __getitem__(self, idx):
        # start_time = datetime.now()
        filename = self.labels.iloc[idx,0]
        wave_path = os.path.join(self.root,filename)
        sample_rate, sample = wavfile.read(wave_path)
        
        # clamp all samples to 500,000 frames
        if len(sample) > 500000:
            sample = sample[:500000]
        else:
            l = len(sample)
            temp = np.zeros(500000,dtype='int16')
            temp[0:l] = sample
            sample = temp
        
        if self.transform:
            sample = self.transform(sample)
        # reshape to (3,224,224) for 
        temp = torch.tensor(sample.numpy().reshape(1,1,128,977))
        downsample = torch.nn.functional.interpolate(temp, (224, 224), mode='bilinear')
        temp = np.repeat(downsample[...],3, axis=1)
        sample = temp.squeeze()
        
        normalize = transforms.Compose([transforms.Normalize(mean,std)])
        sample = normalize(sample)
        #print("Time Elapsed Processing WAV: {} seconds".format(
        #(datetime.now() - start_time).total_seconds()))
        
        label = self.labels.iloc[idx,1]
        return sample, label

In [17]:
# std = (0.229 + 0.224 + 0.225) / 3
# mean = (0.485 + 0.456 + 0.406) / 3
std = [0.229, 0.224, 0.225]
mean = [0.485, 0.456, 0.406]
print(mean)
"""Mel-frequency cepstral coefficients form gives us
    approximation of human auditory response.
    This also attempts to remove speaker dependent qualities.
    our sample rate is 44100 after reading the wav files,
    start with 20 mfccs then increase if needed"""
trns = transforms.Compose([
    lambda x: x.astype(np.float32) / np.max(x),
    lambda x: librosa.feature.mfcc(x, sr=44100, n_mfcc=128),
    transforms.ToTensor()
])

[0.485, 0.456, 0.406]


In [18]:
example = FreesoundDataset(csv_labels='train_binary_labels.csv',root_dir='.',transform=trns)

In [19]:
example[0]

  "See the documentation of nn.Upsample for details.".format(mode))


(tensor([[[ 2.6154e+02,  2.9200e+02,  1.2695e+02,  ..., -2.3144e+03,
           -2.3212e+03, -2.3359e+03],
          [ 1.9039e+02,  1.6011e+02,  5.2380e+01,  ..., -1.4520e+03,
           -1.4552e+03, -1.4679e+03],
          [ 7.6568e+01, -5.0923e+01, -6.6926e+01,  ..., -7.2118e+01,
           -6.9787e+01, -7.9184e+01],
          ...,
          [ 9.7699e+00,  4.0477e+00, -1.4634e+00,  ..., -3.9907e+00,
           -7.3203e+00, -8.2276e-02],
          [ 4.3810e+00, -3.6112e+00, -2.2273e-01,  ..., -2.3093e+00,
           -7.3517e+00, -2.2037e+00],
          [ 1.0130e+00, -8.3979e+00,  5.5268e-01,  ..., -1.2584e+00,
           -7.3714e+00, -3.5295e+00]],
 
         [[ 2.6750e+02,  2.9865e+02,  1.2991e+02,  ..., -2.3659e+03,
           -2.3728e+03, -2.3879e+03],
          [ 1.9477e+02,  1.6381e+02,  5.3679e+01,  ..., -1.4843e+03,
           -1.4876e+03, -1.5006e+03],
          [ 7.8406e+01, -5.1930e+01, -6.8290e+01,  ..., -7.3598e+01,
           -7.1215e+01, -8.0822e+01],
          ...,
    

In [20]:
# see what our samples look like
#plt.plot(example[0,:,:])

In [21]:
# TESTING CODE
# # reshape spectrograms to fit (3,224,224) size tensor
# print(example.shape)
# temp = torch.tensor(example.numpy().reshape(1,1,128,977))
# print(temp.shape)
# downsample = torch.nn.functional.interpolate(temp, (224, 224), mode='bilinear')
# print(downsample.shape)
# print("downsample \n" + str(downsample.numpy()[0]))
# temp = np.repeat(downsample[...],3, axis=1)
# print(temp.shape)
# print("reshaped \n" + str(temp[0]))
# final = temp.squeeze()
# print(final.shape)

In [22]:
dataloader = DataLoader(FreesoundDataset(csv_labels='train_binary_labels.csv',root_dir='.',transform=trns), batch_size=10, shuffle=True, num_workers=0)

In [23]:
for idx, (sample, label) in enumerate(dataloader):
    print(sample.numpy().shape)
    #plt.imshow(sample.numpy()[0,:,:])
    if idx == 2:
        break

(10, 3, 224, 224)
(10, 3, 224, 224)
(10, 3, 224, 224)


# Build net and preprocess inputs for ResNeXt

In [24]:
import torch.nn as nn
import pretrainedmodels
import torch.optim as optim
from torch.optim import lr_scheduler

In [25]:
print(pretrainedmodels.model_names)

['fbresnet152', 'bninception', 'resnext101_32x4d', 'resnext101_64x4d', 'inceptionv4', 'inceptionresnetv2', 'alexnet', 'densenet121', 'densenet169', 'densenet201', 'densenet161', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'inceptionv3', 'squeezenet1_0', 'squeezenet1_1', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19', 'nasnetamobile', 'nasnetalarge', 'dpn68', 'dpn68b', 'dpn92', 'dpn98', 'dpn131', 'dpn107', 'xception', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', 'se_resnext50_32x4d', 'se_resnext101_32x4d', 'cafferesnet101', 'pnasnet5large', 'polynet']


In [26]:
print(pretrainedmodels.pretrained_settings['se_resnext50_32x4d'])

{'imagenet': {'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth', 'input_space': 'RGB', 'input_size': [3, 224, 224], 'input_range': [0, 1], 'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225], 'num_classes': 1000}}


In [27]:
# Use AlexNet
import torchvision.models as models
#from sampler import ImbalancedDatasetSampler
alexnet = models.alexnet(pretrained=True)

In [28]:
# train on GPU if we can
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

std = [0.229, 0.224, 0.225]
mean = [0.485, 0.456, 0.406]

transform = transforms.Compose([
    lambda x: x.astype(np.float32) / np.max(x),
    lambda x: librosa.feature.mfcc(x, sr=44100, n_mfcc=128),
    transforms.ToTensor() 
    ])
train_data = FreesoundDataset(csv_labels='train_binary_labels.csv',root_dir='.',transform=transform)
test_data = FreesoundDataset(csv_labels='test_binary_labels.csv',root_dir='.',transform=transform, test=True)
train_dataloader = DataLoader(train_data, batch_size=64, shuffle=True, drop_last=True) # drop last batch if not full
test_dataloader = DataLoader(test_data, batch_size=32, shuffle=True, drop_last=True)

net = alexnet
for param in net.parameters():
    param.requires_grad = False

new_classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 2),
        )

net.classifier = new_classifier

net = net.to(device)
criterion = nn.CrossEntropyLoss()

# optimizer = optim.SGD(net.classifier.parameters(), lr=0.0001, momentum=0.9)
optimizer = optim.Adam(net.classifier.parameters(),lr=0.001)
#schedule = lr_scheduler.StepLR(optimizer, step_size=10,gamma=0.1)
epochs = 10
BATCH_SIZE = 64
use_cuda = False

In [29]:
# ReNeXt Attempt
# # train on GPU if we can
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# std = pretrainedmodels.pretrained_settings['se_resnext50_32x4d']["imagenet"]['std']
# mean = pretrainedmodels.pretrained_settings['se_resnext50_32x4d']["imagenet"]['mean']

# transform = transforms.Compose([
#     lambda x: x.astype(np.float32) / np.max(x),
#     lambda x: librosa.feature.mfcc(x, sr=44100, n_mfcc=128),
#     transforms.ToTensor() 
#     ])

# train_dataloader = DataLoader(FreesoundDataset(csv_labels='train_binary_labels.csv',root_dir='.',transform=transform), batch_size=64, shuffle=True, drop_last=True) # drop last batch if not full

# resnext = 'se_resnext50_32x4d'
# net = pretrainedmodels.__dict__[resnext](num_classes=1000,pretrained='imagenet')
# for param in net.parameters():
#     param.requires_grad = False
# num_ftrs = net.last_linear.in_features
# net.last_linear = nn.Linear(num_ftrs, 2)
# net = net.to(device)
# criterion = nn.CrossEntropyLoss()

# optimizer = optim.SGD(net.last_linear.parameters(), lr=0.001, momentum=0.9)
# schedule = lr_scheduler.StepLR(optimizer, step_size=10,gamma=0.1)
# epochs = 3 
# BATCH_SIZE = 64
# use_cuda = False

# Train

In [30]:
def model_train(net, inputs, labels, criterion, optimizer):
    """
        net        network used to train
        inputs     (torch Tensor) batch of input images
        labels     (torch Tensor) ground truth labels
        criterion   loss function
        optimizer  used in backward pass

    Returns:
        running_loss    (float) loss from this batch of images
        num_correct     (torch Tensor, size 1) number of inputs
                        in this batch predicted correctly
        total_images    (float or int) total number of images in this batch
    """
    optimizer.zero_grad()
    
    with torch.set_grad_enabled(True):
        outputs = net(inputs)
        loss = criterion(outputs, labels.squeeze())
    
    # back pass
    loss.backward()
    optimizer.step()

    _, index_out = outputs.max(1)
    running_loss = loss.item()
    num_correct = sum(index_out.squeeze() == labels.squeeze()).item()
    total_images = inputs.size(0)

    return running_loss, num_correct, total_images

In [31]:
# Check the params of our net
# !pip install torchsummary
# from torchsummary import summary
# summary(net, input_size=(2,64, 3, 224, 224))

In [32]:
def log_progress(curr_batch, batch_size, curr_epoch, total_epochs, dataloader):
    """
    Prints message logging progress through training.
    """
    progress = float(curr_batch + 1)/(float(len(dataloader.dataset)) / batch_size)
    log = "EPOCH [{}/{}].Progress: {} % ".format(
        curr_epoch + 1, total_epochs, round(progress * 100, 2))
    sys.stdout.write("\r" + log)
    sys.stdout.flush()

In [33]:
# # test dataloader
# for batch_num, (test_inputs, test_labels) in enumerate(test_dataloader):
#     print("batch_num = " + str(batch_num))
#     print("label = " + str(test_labels))
#     print(test_inputs)
#     break

In [34]:
train_loss_history = np.zeros(epochs)
train_acc_history  = np.zeros(epochs)
val_loss_history   = np.zeros(epochs)
val_acc_history    = np.zeros(epochs)

start_time = datetime.now()

for epoch in range(epochs):

    # TRAINING ###
    print("Training...")
    net.train()
    
    running_loss   = 0.0
    num_correct    = 0.0
    total_images   = 0.0

    for batch_num, (inputs, labels) in enumerate(train_dataloader):
        #print(labels)
        log_progress(batch_num, BATCH_SIZE, epoch, epochs, train_dataloader)

        if use_cuda and torch.cuda.is_available():
            inputs = inputs.cuda()
            labels = labels.cuda()

        assert inputs.shape == (64,3,224,224), inputs.shape
        curr_loss, curr_correct, curr_images = model_train(net, inputs, labels, criterion, optimizer)
        running_loss += curr_loss
        num_correct += curr_correct
        total_images += curr_images

    # Update statistics for epoch
    train_loss_history[epoch] = running_loss / total_images
    train_acc_history[epoch]  = float(num_correct)  / float(total_images)
    print("\n Train Avg. Loss: [{}] Acc: {} on {} images\n".format(
          round(train_loss_history[epoch],4), train_acc_history[epoch], total_images) )
    
    # VALIDATION ###

    print("Validating...")
    net.eval()
    
    running_loss   = 0.0
    num_correct    = 0.0
    total_images   = 0.0
    
    for batch_num, (test_inputs, test_labels) in enumerate(test_dataloader):
        
        if use_cuda and torch.cuda.is_available():
            test_inputs = test_inputs.cuda()
            test_labels = test_labels.cuda()

        # Forward pass
        outputs  = net(test_inputs)
                                                 
        loss     = criterion(outputs, test_labels.squeeze())
                                                 
        # Prediction is index with highest class score
        _, preds = torch.max(outputs, 1)
        
        # move data back for analysis
        if use_cuda and torch.cuda.is_available():
            test_labels = test_labels.cpu()
            preds = preds.cpu()

        running_loss  += loss.item()
        num_correct   += torch.sum(preds == test_labels.data.reshape(-1))

        total_images  += test_labels.data.numpy().size
        
    # Update stats for validation data
    val_loss_history[epoch] = running_loss / total_images
    val_acc_history[epoch]  = float(num_correct)  / float(total_images) 
    print("Val Avg. Loss: [{}] Acc: {} on {} images\n".format(
        round(val_loss_history[epoch],4), val_acc_history[epoch], total_images))
    
print("Time Elapsed: {} seconds".format(
    (datetime.now() - start_time).total_seconds()))

Training...
EPOCH [1/10].Progress: 91.62 % 
 Train Avg. Loss: [6.9245] Acc: 0.5379464285714286 on 448.0 images

Validating...
Val Avg. Loss: [0.6516] Acc: 0.59375 on 32.0 images

Training...
EPOCH [2/10].Progress: 91.62 % 
 Train Avg. Loss: [0.2631] Acc: 0.5089285714285714 on 448.0 images

Validating...
Val Avg. Loss: [0.1148] Acc: 0.375 on 32.0 images

Training...
EPOCH [3/10].Progress: 91.62 % 
 Train Avg. Loss: [0.0311] Acc: 0.5513392857142857 on 448.0 images

Validating...
Val Avg. Loss: [0.0252] Acc: 0.5625 on 32.0 images

Training...
EPOCH [4/10].Progress: 91.62 % 
 Train Avg. Loss: [0.0131] Acc: 0.5580357142857143 on 448.0 images

Validating...
Val Avg. Loss: [0.0209] Acc: 0.53125 on 32.0 images

Training...
EPOCH [5/10].Progress: 91.62 % 
 Train Avg. Loss: [0.0101] Acc: 0.6294642857142857 on 448.0 images

Validating...
Val Avg. Loss: [0.0182] Acc: 0.65625 on 32.0 images

Training...
EPOCH [6/10].Progress: 91.62 % 
 Train Avg. Loss: [0.01] Acc: 0.6138392857142857 on 448.0 images

In [108]:
cough_net = net
cough_net.eval()

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [102]:
train_non_cough_idx = train_labels[train_labels.iloc[:,1] != 'Cough'].index
train_cough_idx = train_labels[train_labels.iloc[:,1] == 'Cough'].index

In [103]:
train = FreesoundDataset(csv_labels='train_binary_labels.csv',root_dir='.',transform=transform)

In [111]:
cough_net.eval()

for batch_num, (test_inputs, test_labels) in enumerate(test_dataloader):
    if use_cuda and torch.cuda.is_available():
            test_inputs = test_inputs.cuda()
            test_labels = test_labels.cuda()

    # Forward pass
    outputs  = net(test_inputs)
                                                 
    loss     = criterion(outputs, test_labels.squeeze())
                                                 
    # Prediction is index with highest class score
    _, preds = torch.max(outputs, 1)
    print("predictions = " + str(preds))
    print("labels = " + str(test_labels))

predictions = tensor([1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0,
        1, 0, 0, 1, 0, 1, 1, 0])
labels = tensor([1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0,
        1, 0, 0, 1, 0, 0, 1, 0])


In [158]:
import sounddevice as sd
from scipy.io.wavfile import write

fs = 44100  # Sample rate
seconds = 3  # Duration of recording

myrecording = sd.rec(int(seconds * fs), samplerate=fs, channels=1)
sd.wait()  # Wait until recording is finished
write('output.wav', fs, myrecording)  # Save as WAV file 

In [159]:
sample_rate, sample = wavfile.read('output.wav')
sample

array([ 0.        ,  0.        ,  0.        , ..., -0.00117799,
       -0.00129872, -0.00133156], dtype=float32)

In [160]:
sample_rate, sample = wavfile.read('output.wav')
sample = sample + 1e-012
# start_time = datetime.now()
# clamp all samples to 500,000 frames
if len(sample) > 500000:
    sample = sample[:500000]
else:
    l = len(sample)
    temp = np.zeros(500000,dtype='int16')
    temp[0:l] = sample
    sample = temp
    
print(np.isnan(sample).any())
sample = transform(sample)

False


  


ParameterError: Audio buffer is not finite everywhere

In [151]:
sample_rate, sample = wavfile.read('output.wav')
sample = sample + 1e-012
# start_time = datetime.now()
# clamp all samples to 500,000 frames
if len(sample) > 500000:
    sample = sample[:500000]
else:
    l = len(sample)
    temp = np.zeros(500000,dtype='int16')
    temp[0:l] = sample
    sample = temp
    
sample = sample + 1e-012

sample = transform(sample)
# reshape to (3,224,224) for
temp = torch.tensor(sample.numpy().reshape(1,1,128,977))
downsample = torch.nn.functional.interpolate(temp, (224, 224), mode='bilinear')
temp = np.repeat(downsample[...],3, axis=1)
sample = temp.squeeze()
normalize = transforms.Compose([transforms.Normalize(mean,std)])
sample = normalize(sample)
#print("Time Elapsed Processing WAV: {} seconds".format(
#(datetime.now() - start_time).total_seconds()))
sample.shape
'''Not sure if anything from here on works'''
live_data = FreesoundDataset(csv_labels = 'live_binary_labels.csv', root_dir='.',transform=transform, test=True, evaluate=True)
live_dataloader = DataLoader(live_data, batch_size=1, shuffle=True, drop_last=True)
preds = []
labels = []
for batch_num, (input, label) in enumerate(live_dataloader):
    output = net(input.float())
    _, pred = torch.max(output, 1)
    preds.append(pred)
    labels.append(label)
    break
print("Cough was predicted as...")

if pred.numpy()[0]:
    print("Cough")
else:
    print("Not a Cough")

  


ParameterError: Audio buffer is not finite everywhere