In [1]:
import numpy as np
import matplotlib.pyplot as plt
import librosa
from scipy.signal import butter,filtfilt,find_peaks,find_peaks_cwt,medfilt,savgol_filter
from utils import butter_lowpass_filter, butter_highpass_filter, smooth, positions2onehot, normalize
import torch
import torch.nn as nn
import pandas as pd
from test_metric_utils import *
from model_unet import Unet
from model_AAE import FCAE
from tqdm import tqdm_notebook
import os
import time
%matplotlib inline

ModuleNotFoundError: No module named 'utils'

In [None]:
def inference(model,Speech,n_frame = 192,window_step = 32):
    assert n_frame%window_step ==0
    
    model.eval()
    Speech = np.expand_dims(Speech,axis=-1)
    EGG_pred = np.zeros_like(Speech)
    ratio = np.zeros_like(Speech)
    frame = 0
    
    while frame*window_step + n_frame <= len(Speech):
        tmp = Speech[frame*window_step:frame*window_step+n_frame]
        tmp = torch.Tensor([normalize(tmp)]).cuda() ## preprocessing
        
        result = model(tmp).cpu().detach().numpy()[0]
        EGG_pred[frame*window_step:frame*window_step+n_frame] += result ## postprocessing
        ratio[frame*window_step:frame*window_step+n_frame] +=1
        frame +=1
    
    for i in range(len(EGG_pred)):
        if ratio[i]!=0:
            EGG_pred[i] = EGG_pred[i]/ratio[i]
    return EGG_pred[:n_frame + frame*window_step]

In [None]:
unet_cos = Unet(4,10)
unet_cos = nn.DataParallel(unet_cos)
unet_cos.load_state_dict(torch.load("./models/Unet/best-cosloss.pth"))
unet_cos.cuda()

In [None]:
def get_points(EGG):
#     EGG = normalize(EGG)
    peak_range = (7,15)
    DEGG = np.gradient(EGG,edge_order = 2)
    DEGG = medfilt(DEGG, 3)
    DEGG_low,EGG_low = DEGG.copy(),EGG.copy()
    
    DEGG_low[DEGG_low>0] =0
    EGG_low[EGG_low>0] =0
    
    DEGG_low = find_peaks_cwt(-DEGG_low,np.arange(*peak_range))
    EGG_low = find_peaks_cwt(-EGG_low,np.arange(*peak_range))

    DEGG_high = []
    for i in range(len(DEGG_low)-1):
        DEGG_high.append(DEGG_low[i] + np.argmax(DEGG[DEGG_low[i]:DEGG_low[i+1]]))

    EGG_high = []
    for i in range(len(EGG_low)-1):
        EGG_high.append(EGG_low[i] + np.argmax(EGG[EGG_low[i]:EGG_low[i+1]]))
    
    DEGG_high, EGG_high = np.array(DEGG_high),np.array(EGG_high)
    
    return DEGG_high/16000,DEGG_low/16000,EGG_high/16000,EGG_low/16000

In [None]:
window_step = 64
directory = './datasets/'
file = 'asdf.wav'

[Speech,EGG_true],sr = librosa.load(directory + file,sr=16000,mono=False)
Speech = butter_lowpass_filter(Speech,2500,16000)
itvs = librosa.effects.split(Speech,frame_length = int(192*0.75), hop_length = int(192*0.25),top_db = 10)

S = []
E = []
for st,ed in itvs:
    S += list(Speech[st:ed])
    E += list(EGG_true[st:ed])

Speech = np.array(S)
EGG_true = np.array(E)

EGG_pred = inference(unet_cos,Speech,n_frame = 192,window_step = window_step)
EGG_pred = np.squeeze(EGG_pred,axis=-1)
EGG_pred = smooth(EGG_pred, 49)

l = min(len(EGG_pred),len(EGG_true))
EGG_true =EGG_true[:l]
EGG_pred =EGG_pred[:l]
