In [None]:
import tensorflow as tf
import librosa as lr
import numpy as np
import matplotlib.pyplot as plt
import json
import random
import os
from IPython.display import display, Audio
from sklearn.model_selection import train_test_split

In [None]:
WINDOW_SIZE = 4000

In [None]:
def play_audio(clip,sr):
    display(Audio(clip,rate=sr))
    
def get_transcript_paths(audio_paths):
    transcript_paths = [] 
    for audio_path in audio_paths:
        rest = audio_path.strip('../data/')
        i = rest.find('/')
        folder_num = rest[:i]
        start_at = len('../data/' + folder_num +'/audio/')
        index = audio_path.find('/',start_at)
        id = audio_path[start_at:index]
        transcript_paths.append('../data/'+folder_num+'/conversations/'+id+'.json')
    return transcript_paths

def get_failure_stats(clip_encodings):
    cl_cl_errors = []
    cl_ag_errors = []
    for clip_encoding in clip_encodings:
        clip_info = clip_encoding.split('_')
        audio_path = clip_info[0]
        seg_index = int(clip_info[1])
        window_start = int(clip_info[2]) - WINDOW_SIZE
        transcript_path = get_transcript_paths([audio_path])[0]
    
        with open(transcript_path,'r') as conv_json:
            transcript = json.loads(conv_json.read())
            if(transcript['segments'][seg_index+1]['caller_role'] == 'AG'):
                cl_ag_errors.append(clip_encoding)
            else:
                cl_cl_errors.append(clip_encoding)
    return cl_cl_errors,cl_ag_errors
    
def get_clip_info(clip_encoding):
    clip_info = clip_encoding.split('_')
    audio_path = clip_info[0]
    seg_index = int(clip_info[1])
    window_start = int(clip_info[2]) - WINDOW_SIZE
    
    transcript_path = get_transcript_paths([audio_path])[0]
    
    with open(transcript_path,'r') as conv_json, open(audio_path,'rb') as conv_audio:
        audio, sr = lr.load(conv_audio,sr=8000, mono=False)
        transcript = json.loads(conv_json.read())
        
        start_ms = transcript['segments'][seg_index]['start_ms'] + window_start
        end_ms = start_ms+WINDOW_SIZE
        start_sample_window = lr.core.time_to_samples(start_ms/1000, sr=8000) #sr is 8000
        end_sample_window = lr.core.time_to_samples(end_ms/1000, sr=8000)      #sr is 8000  
        clip_audio = audio[:,start_sample_window:end_sample_window] #notice we grab both channels
        
        start_ms = transcript['segments'][seg_index]['start_ms']
        end_ms = start_ms+transcript['segments'][seg_index]['duration_ms']
        start_sample = lr.core.time_to_samples(start_ms/1000, sr=8000) #sr is 8000
        end_sample = lr.core.time_to_samples(end_ms/1000, sr=8000)      #sr is 8000
        seg_audio = audio[:,start_sample:end_sample]
        
        start_ms = transcript['segments'][seg_index+1]['start_ms']
        end_ms = start_ms+transcript['segments'][seg_index+1]['duration_ms']
        start_sample = lr.core.time_to_samples(start_ms/1000, sr=8000) #sr is 8000
        end_sample = lr.core.time_to_samples(end_ms/1000, sr=8000)      #sr is 8000
        seg_audio_next = audio[:,start_sample:end_sample]
        
        print("================")
        print('Entire segment:')
        play_audio(seg_audio,8000)
        print("Transcript: ",transcript['segments'][seg_index]['transcript'])
        print('Next segment:')
        play_audio(seg_audio_next,8000)
        print("Transcript: ",transcript['segments'][seg_index+1]['transcript'])
        print('Window used for classification:')
        play_audio(clip_audio,8000)
        

In [None]:
false_pos_clips_dl = np.load('../util/false_positives_dl_'+str(WINDOW_SIZE)+'.npy')
cl_cl_dl, cl_ag_dl = get_failure_stats(false_pos_clips_dl)


false_pos_clips_rf = np.load('../util/false_positives_rf_'+str(WINDOW_SIZE)+'.npy')
cl_cl_rf, cl_ag_rf = get_failure_stats(false_pos_clips_rf)

In [None]:
false_neg_clips_dl = np.load('../util/false_negatives_dl_'+str(WINDOW_SIZE)+'.npy')
cl_fn_dl, ag_fn_dl = get_failure_stats(false_neg_clips_dl)

false_neg_clips_rf = np.load('../util/false_negatives_rf_'+str(WINDOW_SIZE)+'.npy')
cl_fn_rf, ag_fn_rf = get_failure_stats(false_neg_clips_rf)

In [None]:
##FALSE NEGATIVE STATS
n_fn_ag_dl = len(ag_fn_dl)
print('DL number of false negatives:', n_fn_ag_dl)


n_fn_ag_rf = len(ag_fn_rf)
print('RF number of false negatives:', n_fn_ag_rf)

In [None]:
sample = np.random.choice(cl_fn_dl,5,replace=False)
for clip_encoding in sample:
    get_clip_info(clip_encoding)

In [None]:
##FALSE POSITIVE STATS:

In [None]:
n_cl_dl = len(cl_cl_dl)
n_ag_dl = len(cl_ag_dl)
print('DL: Pause mistakes:',n_cl_dl)
print('DL: Too early mistakes',n_ag_dl)
print('DL: Total',n_ag_dl+n_cl_dl)
print('DL Pause Failure Rate:',n_cl_dl/(n_cl_dl + n_ag_dl))

n_cl_rf = len(cl_cl_rf)
n_ag_rf = len(cl_ag_rf)
print('RF: Pause mistakes:',n_cl_rf)
print('RF: Too early mistakes',n_ag_rf)
print('RF: Total',n_ag_rf+n_cl_rf)
print('RF: Pause Failure Rate:',n_cl_rf/(n_cl_rf + n_ag_rf))

In [None]:
for clip_encoding in cl_ag_dl:
    get_clip_info(clip_encoding)

In [None]:
false_pos_sample = np.random.choice(false_pos_clips,100,replace=False)
false_neg_sample = np.random.choice(false_neg_clips,100,replace=False)

In [None]:
for clip_encoding in false_pos_sample:
    get_clip_info(clip_encoding)

In [None]:
for clip_encoding in false_neg_sample:
    get_clip_info(clip_encoding)