In [None]:
import torch
import torchaudio
import json
import pickle
import numpy as np
from numpy import ma

print(torch.__version__)
print(torchaudio.__version__)

In [None]:
#HOW TO GENERATE MASK (LABEL i.e. 0 or 1) FOR CLEAN AUDIO

In [None]:
def get_mask(data, sample_rate=16000, min_utterance_length_ms=25, merge_dist_ms=32,
             silence_value=0, silence_rtol=1e-1, silence_atol=1e-2):
    

    min_utterance_length = min_utterance_length_ms * sample_rate / 1000.0
    merge_dist = merge_dist_ms * sample_rate / 1000.0

    
    if not isinstance(data, np.ndarray):
        data = np.array(data)
        assert len(data.shape) == 1
    
    
    masked = (~ma.getmaskarray(ma.masked_values(data, silence_value, rtol=silence_rtol, atol=silence_atol))).astype(np.int8)
    last_i = -1
    
    
    for i in range(1, masked.shape[0]):
        if masked[i - 1] == 1 and masked[i] == 0:
            last_i = i - 1
        elif masked[i - 1] == 0 and masked[i] == 1 and last_i > -1 and i - last_i <= merge_dist:
            masked[last_i:i] = 1
            last_i = -1
            
            
    last_i = -1
    
    
    for i in range(masked.shape[0]):
        if last_i == -1 or (masked[i - 1] == 0 and masked[i] == 1):
            last_i = i - 1
        elif masked[i - 1] == 1 and masked[i] == 0 and i - last_i < min_utterance_length:
            masked[last_i:i] = 0
            last_i = -1
            
            
    return masked

In [None]:
#FOR CLEAN AUDIO

import os
# assign directory to clean audio
directory = '/home/n2202857e/Documents/VAD/final_test_clean_mono'

# iterate over files in
# that directory
for filename in sorted(os.listdir(directory)):
    
    
    f = os.path.join(directory, filename)
    saving_list = []
    print(f)
    
    
    for audiofilename in sorted(os.listdir(f)):
        f1 = os.path.join(f, audiofilename)
        print(f1)
        waveform, sample_rate = torchaudio.load(f1)
        np_arr = waveform.cpu().detach().numpy()
        masked_array = get_mask(np_arr)
        saving_list.append(masked_array[0])
        
        
    #Pickling
    with open(filename+'_CLEAN.txt', "wb") as fp:   
        pickle.dump(saving_list, fp)

In [None]:
#HOW TO GENERATE MASK (LABEL i.e. 0 or 1) FOR NOISY AUDIO

In [None]:
import numpy
import torch
import torchaudio
import json
import pickle
import os
from pyannote.audio import Pipeline
import wave
import contextlib
from sklearn.metrics import f1_score

In [None]:
pipeline = Pipeline.from_pretrained("pyannote/voice-activity-detection")

In [None]:
# assign directory to noisy audio
directory = '/home/n2202857e/Documents/VAD/final_test_noisy'

# iterate over files in
# that directory

for filename in sorted(os.listdir(directory)):
    
    
    f = os.path.join(directory, filename)
    print(f)
    saving_list = []
    
    
    for audiofilename in sorted(os.listdir(f)):
        f1 = os.path.join(f, audiofilename)
        print(f1)
        
        
        output = pipeline(f1)
        
        
        with contextlib.closing(wave.open(f1,'r')) as f3:
            frames = f3.getnframes()
            rate = f3.getframerate()
            duration = frames / float(rate)
        np_arr = numpy.zeros(frames,int)
        
        
        for speech in output.get_timeline().support():
            # active speech between speech.start and speech.end
            np_arr[int(speech.start*16000):int(speech.end*16000)] = 1
            
            
        saving_list.append(np_arr)
        
    #Pickling
    with open(filename+'_NOISY.txt', "wb") as fp:   
        pickle.dump(saving_list, fp)