In [34]:
import numpy as np
import soundfile as sf
import librosa 
import math
from glob import glob
import argparse
import os
import sys

In [35]:
class RawClip3(object):
    """Loads audio clips from disk, applies a rolling window, and
    extracts features from each sample."""
    featureFuncs = ['tonnetz', 'spectral_rolloff', 'spectral_contrast',
                    'spectral_bandwidth', 'spectral_flatness', 'mfcc',
                    'chroma_cqt', 'chroma_cens', 'melspectrogram']

    def __init__(self, sourcefile, Y_class=None):
        self.y, self.sr = sf.read(sourcefile)
        self.laughs = None
        self.Y_class = Y_class

    def resample(self, rate, channel):
        return librosa.resample(self.y.T[channel], self.sr, rate)

    def amp(self, rate=22050, n_fft=2048, channel=0):
        D = librosa.amplitude_to_db(librosa.magphase(librosa.stft(
            self.resample(rate, channel), n_fft=n_fft))[0], ref=np.max)
        return D

    def _extract_feature(self, func):
        method = getattr(librosa.feature, func)

        # Construct params for each 'class' of features
        params = {'y': self.raw}
        if 'mfcc' in func:
            params['sr'] = self.sr
            params['n_mfcc'] = 128
        if 'chroma' in func:
            params['sr'] = self.sr

        feature = method(**params)

        return feature

    def _split_features_into_windows(self, data, duration):
        # Apply a moving window
        windows = []

        # Pad the rightmost edge by repeating frames, simplifies stretching
        # the model predictions to the original audio later on.
        data = np.pad(data, [[0, duration], [0, 0]], mode='edge')
        for i in range(data.shape[0] - duration):
            windows.append(data[i:i + duration])

        return np.array(windows)

    def build_features(self, duration=30, milSamplesPerChunk=10):
        # Extract features, one chunk at a time (to reduce memory required)
        # Tip: about 65 million samples for a normal-length episode
        # 10 million samples results in around 1.5GB to 2GB memory use
        features = []

        chunkLen = milSamplesPerChunk * 1000000
        numChunks = math.ceil(self.y.shape[0] / chunkLen)

        for i in range(numChunks):
            # Set raw to the current chunk, for _extract_feature
            self.raw = self.y.T[0][i * chunkLen:(i + 1) * chunkLen]

            # For this chunk, run all of our feature extraction functions
            # Each returned array is in the shape (features, steps)
            # Use concatenate to combine (allfeatures, steps)
            chunkFeatures = np.concatenate(
                list(
                    map(self._extract_feature, self.featureFuncs)
                )
            )
            features.append(chunkFeatures)

        # Transform to be consistent with our LSTM expected input
        features = np.concatenate(features, axis=1).T
        # Combine our chunks along the time-step axis.
        features = self._split_features_into_windows(features, duration)

        return features

In [36]:
class LaughRemover(object):
    """Contains the logic to apply predictions as audio transformations"""
    def __init__(self, kerasModel=None, kerasModelFile=None):
        import keras
        assert kerasModel or kerasModelFile
        if kerasModel:
            self.model = kerasModel
        elif kerasModelFile:
            self.model = keras.models.load_model(filepath=kerasModelFile)

    def remove_dialogues(self, infile, outfile):
        """Remove laughs from a single sound file"""
        rc = RawClip3(infile)
        rc.laughs = self.model.predict(rc.build_features())
        self._apply_laughs_array(rc.y, rc.sr, outfile, rc.laughs[:, 0])
        return rc
    
    def remove_laughs(self, infile, outfile):
        """Remove laughs from a single sound file"""
        rc = RawClip3(infile)
        rc.laughs = self.model.predict(rc.build_features())
        self._apply_laughs_array(rc.y, rc.sr, outfile, rc.laughs[:, 1])
        return rc

    def batch_remove_laughs(self, indir : str, outdir: str, batch_size: int=32):
        """Remove laughs from all files in a directory"""
        # If indir == outdir, processes files in-place 
        batch_of_features = []
        for filename in os.listdir(indir):
            rc = RawClip3(os.path.join(indir, filename))
            features = rc.build_features()
            # Need to add some form of padding to each file so that it can be batched for keras.
            # Then need to unpad so that original file duration is restored.
            # Right now, it just loads the model once, and runs all the files through it one-by-one. 
            rc.laughs = self.model.predict(features)
            self._apply_laughs_array(rc.y, rc.sr, os.path.join(outdir, filename), rc.laughs[:, 1])

    def batch_remove_dialogues(self, indir : str, outdir: str, batch_size: int=32):
        """Remove laughs from all files in a directory"""
        # If indir == outdir, processes files in-place 
        batch_of_features = []
        for filename in os.listdir(indir):
            rc = RawClip3(os.path.join(indir, filename))
            features = rc.build_features()
            # Need to add some form of padding to each file so that it can be batched for keras.
            # Then need to unpad so that original file duration is restored.
            # Right now, it just loads the model once, and runs all the files through it one-by-one. 
            rc.laughs = self.model.predict(features)
            self._apply_laughs_array(rc.y, rc.sr, os.path.join(outdir, filename), rc.laughs[:, 0])


    def _apply_laughs_array(self, y, sr, outfile, laughs):
        y.T[0] = self._apply_frames_to_samples(frames=laughs, samples=y.T[0])

        y.T[1] = self._apply_frames_to_samples(frames=laughs, samples=y.T[1])

        sf.write(outfile, y, sr) 

    def _apply_frames_to_samples(self, frames, samples, exp=1, period=15):
        # Apply a rolling average to smooth the laugh/notlaugh sections
        frames = np.convolve(frames, np.ones((period,)) / period, mode='same')
        # Each frame = default 512 samples, so expand over that period
        frames = np.repeat(frames, librosa.core.frames_to_samples(1))
        # Trim excess padding off the rightmost end
        frames = frames[:len(samples)]
        # Finally, apply audio volume change
        return samples * (frames ** exp)

In [37]:
from keras.models import load_model
Model = load_model('model.h5')


In [38]:

def do_dialogues(sourceFile, outFile, model):
    params = {}
    if type(model) == str:
        params['kerasModelFile'] = model
    else:
        params['kerasModel'] = model

    laughr = LaughRemover(**params)

    arr=laughr.remove_laughs(sourceFile, outFile)

    return arr  


In [39]:
def do_laughs(sourceFile, outFile, model):
    params = {}
    if type(model) == str:
        params['kerasModelFile'] = model
    else:
        params['kerasModel'] = model

    laughr = LaughRemover(**params)

    arr=laughr.remove_dialogues(sourceFile, outFile)

    return arr  


In [42]:
def batch_laughs(indir, outdir, model):
    params = {}
    if type(model) == str:
        params['kerasModelFile'] = model
    else:
        params['kerasModel'] = model

    laughr = LaughRemover(**params)

    laughr.batch_remove_dialogues(indir, outdir)


In [44]:
import glob, os
from os import path

path = "Dataset\Funny_Data\Audio_Files\Funny_Audio_Files" #path to folder containing mp3 files of audio
dst = "Dataset\Funny_Data\Audio_Files\Labelling_Laugh_Audio"

batch_laughs(path, dst, Model)

KeyboardInterrupt: 

### Run the below cell to get mute laughs from one audio clip

In [27]:
path = 'Sorabh_Pant.wav'
root_ext = os.path.splitext(path)
print(root_ext[0])
new_name = root_ext[0]+str('_funny.wav')
renew_name = root_ext[0]+str('_dialogue.wav')
print(new_name)
print(renew_name)


Sorabh_Pant
Sorabh_Pant_funny.wav
Sorabh_Pant_dialogue.wav


In [28]:
#do_laughs(sourceFile=path,                outFile=new_name, model=Model)

In [29]:
do_dialogues(sourceFile=path,
                outFile=renew_name,
                model=Model)



<__main__.RawClip3 at 0x16fe41ced70>

In [30]:
import glob, os
from os import path

path = "Dataset\Funny_Data\Audio_Files\Funny_Audio_Files" #path to folder containing mp3 files of audio
dst = "Dataset\Funny_Data\Audio_Files\Labelling_Laugh_Audio"

filenames = glob.glob(os.path.join(path, '*.wav'))
filenames = sorted(filenames)
for filename in filenames:
    print(filename)
    name = filename.split('\\')[-1]
    name = name.split('.')[0]
    name = dst + '\\name' + '.wav'
    print(name)
    do_dialogues(sourceFile=filename,
                outFile=name,
                model=Model)

Dataset\Funny_Data\Audio_Files\Funny_Audio_Files\Kanan_Gill.wav
Dataset\Funny_Data\Audio_Files\Labelling_Laugh_Audio\name.wav
Dataset\Funny_Data\Audio_Files\Funny_Audio_Files\Mark_Normand.wav
Dataset\Funny_Data\Audio_Files\Labelling_Laugh_Audio\name.wav
Dataset\Funny_Data\Audio_Files\Funny_Audio_Files\Sorabh_Pant.wav
Dataset\Funny_Data\Audio_Files\Labelling_Laugh_Audio\name.wav
156/912 [====>.........................] - ETA: 29s

KeyboardInterrupt: 