In [1]:
import os
import pandas as pd
import numpy as np
import librosa
from sklearn.preprocessing import LabelEncoder, StandardScaler
import torch

In [2]:
class_names = ['Covid', 'Non-Covid']

In [3]:
class CoughNet(torch.nn.Module):
    def __init__(self, input_size):
        super(CoughNet, self).__init__()
        self.l1 = torch.nn.Linear(input_size, 512)
        self.l2 = torch.nn.Linear(512, 256)
        self.l3 = torch.nn.Linear(256, 128)
        self.l4 = torch.nn.Linear(128, 64)
        self.l5 = torch.nn.Linear(64, 10)
        self.l6 = torch.nn.Linear(10, 2)

    def forward(self, x):
        x = torch.relu(self.l1(x))
        x = torch.relu(self.l2(x))
        x = torch.relu(self.l3(x))
        x = torch.relu(self.l4(x))
        x = torch.relu(self.l5(x))
        x = self.l6(x)
        return x

def preproces(fn_wav):
    y, sr = librosa.load(fn_wav, mono=True, duration=5)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    rmse = librosa.feature.rms(y=y)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    
    feature_row = {        
        'chroma_stft': np.mean(chroma_stft),
        'rmse': np.mean(rmse),
        'spectral_centroid': np.mean(spectral_centroid),
        'spectral_bandwidth': np.mean(spectral_bandwidth),
        'rolloff': np.mean(rolloff),
        'zero_crossing_rate': np.mean(zcr),        
    }
    for i, c in enumerate(mfcc):
        feature_row[f'mfcc{i+1}'] = np.mean(c)

    return feature_row

In [4]:
fn_wav_pos = '/content/pos-0421-086-cough-m-65-3.mp3' # positive example

In [5]:
# load model from checkpoint
loaded_checkpoint = torch.load('/content/checkpoint_91_79.pth')

hparams = loaded_checkpoint['hparams']
scaler = loaded_checkpoint['scaler']
encoder = loaded_checkpoint['encoder']

model = CoughNet(len(hparams['features']))
model.eval()
model.load_state_dict(loaded_checkpoint['model_state'])

# create input features
df_features = pd.DataFrame(columns=hparams['features'])
df_features = df_features.append(preproces(fn_wav_pos), ignore_index=True)
X = np.array(df_features[hparams['features']], dtype=np.float32)
X = torch.Tensor(scaler.transform(X))

outputs = torch.softmax(model(X), 1)
predictions = torch.argmax(outputs.data, 1)

# print result
print(f'model outputs {outputs[0].detach().numpy()} which predicts the class {encoder.classes_[predictions]}!')



model outputs [9.9987197e-01 1.2801368e-04] which predicts the class covid!


In [6]:
#final
predicted = class_names[(np.argmax(outputs[0].detach().numpy()))]
confidence = np.max(outputs[0].detach().numpy())

print('Result: {}\nConfidence: {}'.format(predicted, confidence))

Result: Covid
Confidence: 0.9998719692230225
