In [1]:
# Import Packages

from packages import *
%matplotlib inline

In [2]:
############ function to calculate the entropy value of an input signal ################
# entropy values can be taken as a measure of complexity of the signal
def calculate_entropy(list_values):
    counter_values = Counter(list_values).most_common()
    probabilities = [elem[1]/len(list_values) for elem in counter_values]
    e = entropy(probabilities)
    return e

############## function to calculate some statistics ################
def calculate_statistics(list_values):
    n25 = np.nanpercentile(list_values, 25) # 25th percentile value
    median = np.nanpercentile(list_values, 50) # Median
    n75 = np.nanpercentile(list_values, 75) # 75th percentile value
    mean = np.nanmean(list_values) # Mean
    std = np.nanstd(list_values) # standard deviation
    var = np.nanvar(list_values) # variance
    return [n25, median, n75, mean, std, var]

############# function to calculate the ZCR and mean CR ############
def calculate_crossings(list_values):
    # Zero crossing rate, i.e. the number of times a signal crosses y = 0
    zero_crossing_indices = np.nonzero(np.diff(np.array(list_values)>0))[0] 
    no_zero_crossings = len(zero_crossing_indices)
    
    # Mean crossing rate, i.e. the number of times a signal crosses y = mean(y)
    mean_crossing_indices = np.nonzero(np.diff(np.array(list_values)>np.nanmean(list_values)))[0]
    no_mean_crossings = len(mean_crossing_indices)
    return [no_zero_crossings, no_mean_crossings]

############# function to combine the results of these three functions above ###########
def get_features(list_values):
    entropy = calculate_entropy(list_values)
    crossings = calculate_crossings(list_values)
    statistics = calculate_statistics(list_values)
    return [entropy] + crossings + statistics

In [3]:
def build_predictions(audio_dir):
    
    fn_prob = {}
    
    for fn in os.listdir(audio_dir):
        print(fn)
        
        rate, wav = wavfile.read(os.path.join(audio_dir,fn))
       
        y_pred = []
        
        step=int(rate/10)
 
        for i in range(0,wav.shape[0]-step):
            
            # crop duration
            sample = wav[i:i+step]
            
            # feature extraction
            list_coeff = pywt.wavedec(data = sample, 
                                  wavelet = 'db4', # filter
                                  mode = 'constant', # duplication padding
                                  level = 7 # no. of decomposition levels
                                 )

            # for each list in list_coeff, get 9 features.
            x = []
            for coeff in list_coeff:
                x += get_features(coeff) 

            x = pd.DataFrame(zip(x)).T
            x.columns = [str(i) for i in list(x.columns)]
            
            # selection
            x = x[col]
            
            # scaling
            x = sc.transform(x)
             
            # prediction    
            y_hat = model.predict(x)[0]
    
            y_pred.append(y_hat)
         
            
        fn_prob[fn] = statistics.mode(y_pred)
        print('*'*100)
        
    return fn_prob

In [4]:
# load models
sc = load('models/Wavelet/std_scaler_Wavelet.bin')
model = pickle.load(open('models/Wavelet/KNN_Wavelet.sav', 'rb'))

In [5]:
# load dataset for feature selection
get_columns = pd.read_csv('data/Wavelet_train_data.csv')
col = list(get_columns.columns[:-1])

In [7]:
# prediction
fn_prob = build_predictions('test_file')
fn_prob

neg-0421-083-cough-m-53-0.wav
****************************************************************************************************
pos-0421-087-cough-f-40-5.wav
****************************************************************************************************


{'neg-0421-083-cough-m-53-0.wav': 0, 'pos-0421-087-cough-f-40-5.wav': 1}