In [5]:
import scipy.io as sio
import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd

In [29]:
# This function takes in the path of a single mat file that is generated by a wav file and returns the corresponding 
# fast fourier transforms for each syllable along with the time stamp that syllable was measured
# turn visualise = False, else visualise is defaulted to True so all the syllables can be viewed
def load_mat(path, visualise=True):
    mat_contents = sio.loadmat(path)
    num_syllables = mat_contents['syllable_data'][1].shape[0]
    
    data=[]
    start=[]
    end=[]
    
    for index in range(num_syllables):
        
        #get the corresponding data
        syllable_gt = mat_contents['syllable_data'][1][index]

        #analysis for a single sample

        syllable_duration = syllable_gt.shape[1]
        frame_shift_ms = 0.0004*4
        repertoire_unit_size_seconds=200

        patch_window = repertoire_unit_size_seconds/frame_shift_ms*1e-3
        syllable_patch_window = max(patch_window, np.ceil(syllable_duration/2)*2)
        syllable_patch_gt = np.zeros((syllable_gt.shape[0], int(syllable_patch_window)))

        syllable_patch_window_start = math.floor(syllable_patch_window/2)-math.floor(syllable_duration/2);
        syllable_patch_gt[:,syllable_patch_window_start:syllable_patch_window_start+syllable_duration] = syllable_gt

        syllable_fft = mat_contents['syllable_data'][2][index]
        syllable_fft_median = np.median(syllable_fft)
        syllable_fft_median = 2*syllable_fft_median;
        
        print(syllable_patch_window)
        
        syllable_patch_fft = syllable_fft_median*np.ones((syllable_fft.shape[0], int(syllable_patch_window)))
        syllable_duration = syllable_fft.shape[1]
        syllable_patch_fft[:, syllable_patch_window_start:syllable_patch_window_start + syllable_duration] = syllable_fft;


        syllable_patch_fft_dB = 10*np.log10(abs(syllable_patch_fft[::2,:]+1e-5))
        
       
        
        data.append(syllable_patch_fft_dB)
        start.append(float(mat_contents['syllable_stats'][7][index]))
        end.append(float(mat_contents['syllable_stats'][8][index]))

        if visualise:
            plt.imshow(syllable_patch_fft_dB, cmap="binary")
            plt.show()
    
    df = pd.DataFrame({
        'data': data,
        'start': start,
        'end': end
    })
    
    return df
        
    

In [30]:
# Load in a single mat file
path = "audio/wav/YW0000729"
df = load_mat(path, visualise=False)

125.0
125.0
125.0
125.0
172.0
142.0
125.0
125.0
125.0
125.0
125.0
125.0
125.0
125.0
125.0
125.0
125.0
125.0
125.0
125.0
125.0
125.0
125.0
125.0
125.0
125.0
125.0
