<a href="https://colab.research.google.com/github/Hitansh159/match-filter/blob/main/Data_Genrator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# !pip install kaggle
# ! cp kaggle.json ~/.kaggle/
# ! chmod 600 ~/.kaggle/kaggle.json



In [None]:
# !kaggle datasets download -d chrisfilo/urbansound8k

Downloading urbansound8k.zip to /content
100% 5.60G/5.61G [01:53<00:00, 93.4MB/s]
100% 5.61G/5.61G [01:53<00:00, 52.9MB/s]


In [None]:
# !unzip *.zip


In [None]:
!cp -r ./drive/MyDrive/fold9 ./

In [None]:
import librosa
import os
import random
import soundfile as sf
import pandas as pd
import numpy as np

In [None]:

# TODO:: typos in docstring
class DataGenrator():
    """
        Class will be used to genrate dataset for different speakers

        Attributes: 
            dataset: it will store dataset genrated from last genrate call or None
            sample_rate: it will store sample rate of sound sapmle or None
    """
    dataset = pd.DataFrame()
    sample_rate = None


    def __init__(self, background_folder_path, speakers_folder_path):
        """
            Creates Data for training model from background noise and speakers audio

            Arguments:
            background_folder_path: Path to the folder containing background audio file with .wav format atleast of 10 sec length
            speaker_folder_path: Path to the folder containing speakers folder containg their audio with .wav format atleast of 2 sec length

            Return:
            Void
        """
        self._background_folder_path = background_folder_path
        self._speakers_folder_path = speakers_folder_path
        self._speakers = []
        for i in os.listdir(self._speakers_folder_path):
            if os.path.isdir(os.path.join(self._speakers_folder_path, i)):
                self._speakers.append(i)
        assert(len(self._speakers) != 0)

    def random_time_segment(self):
        """
            Generates random position where speaker clip will be imposed 

            Arguments:
                None
            return:
                time_segment: tuple of start and end index of time segment (start, end)
        """
        start = random.randint(0, self.sample_rate * 8)
        end = start + 2 * self.sample_rate
        return (start, end)

    def insert_clip(self, background, speaker, time_segment):
        """
            Inserts the speaker clip in bacground for given time segment

            Arguments:
                background: a 10 sec audio file converted to numpy array
                speaker: a 2 sec audio file converted to numpy array 
                time_segment: tuple of start and end index of time segment (start, end)
            Return:
                background: background clip with added speaker sound in clip duration
        """
        print(len(background), len(speaker), time_segment[1]- time_segment[0])
        background[time_segment[0]:time_segment[1]] += speaker
        return background
    
    def insert_ones(self, y: list, time_segment: tuple, error_tolurance: int):
        """
            It will add 1 where speakers sound is present it will also plus error tolarnce

            Arguments:
                y: Ground truth list 
                time_segment: tuple of start and end index of time segment (start, end)
                error_tolurance: number of 1 to be added after speakers clip ends
            Returns:
                y: ground truth list with 1's added in time segment  
        """
        for i in range(time_segment[0], min(time_segment[1]+error_tolurance, self.sample_rate * 10)):
            y[i] = 1
        return y
    
    def generate_example(self, background, speaker, others):
        """
            It generates example for a given speaker, background and others

            Arguments:
                background(str): name of background file
                speaker(str): name of speaker
                others(list): name of other speaker
            Return:
                audio_clip: numpy array representing audio file
                y: ground truth 
        """
        y = [0 for i in range(self.sample_rate*10)]
        number_of_speaker_clips = random.randint(1, 3)
        number_of_other_speaker_clips = random.randint(0, 3)

        speaker_clip_time_segments = [self.random_time_segment() for i in range(number_of_speaker_clips)]
        other_speaker_clip_time_segments = [self.random_time_segment() for i in range(number_of_other_speaker_clips)]

        for i in os.listdir(os.path.join(self._)
        speaker_clips = [i if i != '.ipynb_checkpoints' for i in os.listdir( os.path.join(self._speakers_folder_path, speaker))]
        #TODO:: to make code others clip appendable in background

        # TODO:: add more sample to raw_data
        speaker_clips = random.sample(speaker_clips, number_of_speaker_clips)
        print(speaker_clips)
        speaker_clips = [librosa.load(os.path.join(self._speakers_folder_path, speaker, i))[0][:2*self.sample_rate] for i in speaker_clips]

        background, self.sample_rate = librosa.load(os.path.join(self._background_folder_path, background))

        if len(background) < 10*self.sample_rate:
          length = len(background) / self.sample_rate
          left = 10 - length
          repeat = int(left / length) + 1 
          extra = (left % length ) * self.sample_rate 

          print(length, left, repeat, extra,len(background))
          background = np.tile(background, repeat)
          background = np.append(background, background[:int(extra) if extra else None]) 
          print(len(background))


        background = background[:10*self.sample_rate] * 0.9
        
        for i in range(number_of_speaker_clips):
            print(f"start: {speaker_clip_time_segments[i][0]/self.sample_rate} end: {speaker_clip_time_segments[i][1]/self.sample_rate}")
            print(f'speaker_clips: {len(speaker_clips[i])}, id: {i}')
            background = self.insert_clip(background, speaker_clips[i], speaker_clip_time_segments[i])
            y = self.insert_ones(y, speaker_clip_time_segments[i], 150)

        return background, y
    
    # TODO:: implement
    def generate_dataset(self, speaker :str, number:int, other_speaker :bool = False):
        """
            It generates dataset for a given speaker and saves dataset in dataset attributes

            Arguments:
                speaker(str): name of speaker
                number(int): number of example to be genrated
                other_speaker(bool): default Fasle
            Return:
                void   
        """

        if other_speaker:
          print("please implement")
          return

        Y = []
        background_sounds = os.listdir(self._background_folder_path)
        for idx, background_sound in enumerate(background_sounds):
          if background_sound == '.ipynb_checkpoints':
            continue
          audio, y = self.generate_example(background_sound, speaker, [])
          Y.append(y)
          sf.write(os.path.join('./', 'dataset', speaker,f'{idx}.wav'), audio, self.sample_rate)
        self.dataset['y'] = Y
        self.dataset.to_csv(os.path.join('./', 'dataset', 'target.csv'))



        
    
    

SyntaxError: ignored

In [None]:
dataGenrator = DataGenrator('./raw_data/background/', './raw_data/speaker')

In [None]:
temp, dataGenrator.sample_rate = librosa.load('./raw_data/background/101729-0-0-1.wav')

In [None]:
dataGenrator.generate_dataset('narendra modi', 100, False)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
4.0 6.0 2 44100.0 88200
220500
start: 5.389931972789116 end: 7.389931972789116
speaker_clips: 44100, id: 0
220500 44100 44100
start: 2.640136054421769 end: 4.6401360544217685
speaker_clips: 44100, id: 1
220500 44100 44100
start: 5.897097505668934 end: 7.897097505668934
speaker_clips: 44100, id: 2
220500 44100 44100
['34.wav', '172.wav']
4.0 6.0 2 44100.0 88200
220500
start: 6.817777777777778 end: 8.817777777777778
speaker_clips: 44100, id: 0
220500 44100 44100
start: 4.948163265306122 end: 6.948163265306122
speaker_clips: 44100, id: 1
220500 44100 44100
['163.wav']
4.0 6.0 2 44100.0 88200
220500
start: 0.46249433106575966 end: 2.4624943310657597
speaker_clips: 44100, id: 0
220500 44100 44100
['155.wav']
4.0 6.0 2 44100.0 88200
220500
start: 1.1372335600907029 end: 3.137233560090703
speaker_clips: 44100, id: 0
220500 44100 44100
['20.wav', '67.wav', '155.wav']
4.0 6.0 2 44100.0 88200
220500
start: 0.8329251700680272 end: 2

In [None]:
dataGenrator = DataGenrator('./raw_data/background', './raw_data/speaker')
temp, dataGenrator.sample_rate = librosa.load('./raw_data/background/101729-0-0-1.wav')
audio, y=  dataGenrator.generate_example('101729-0-0-1.wav', 'narendra modi', '')
print(len(audio))
print(len(y))

sf.write('./audio.wav', audio, dataGenrator.sample_rate)



In [None]:
l = 88200 / sr
l

4.0

In [None]:
88200*2

176400

In [None]:
track, sr = librosa.load('1.wav')

In [None]:
10*sr

220500

In [None]:
len(track)

10051072

In [None]:
for id, start in enumerate(range(0, len(track), 2*sr)):
  # print(start, start+(sr*2))
  audio = track[start:start+(sr*2)]
  sf.write(f'./raw_data/speaker/narendra modi/{id}.wav', audio, sr)

In [None]:
l = np.array([1, 2, 5,354, 4])

np.append(l, l[:2])

array([  1,   2,   5, 354,   4,   1,   2])

In [None]:
(9966600 - 10010700) == (10010700 - 10054800)

True

In [None]:
(9966600 - 10010700)

-44100

In [None]:
l[:5]*0.05

array([ 0.05,  0.1 ,  0.25, 17.7 ,  0.2 ])

In [None]:
!rm -r ./raw_data/speaker/'narendra modi'/

In [None]:
d = pd.DataFrame()

In [None]:
d['y'] = []

In [None]:
d.append([[1, 2, 4,3], [1,2,3,4,56]], 'y')

Unnamed: 0,y,0,1
0,,"[1, 2, 4, 3]","[1, 2, 3, 4, 56]"


In [None]:
d

Unnamed: 0,y


In [None]:
!rm -r /content/raw_data/speaker/'narendra modi'/.ip*

In [None]:
os.listdir('/content/raw_data/speaker/narendra modi')

['173.wav',
 '90.wav',
 '128.wav',
 '168.wav',
 '220.wav',
 '22.wav',
 '38.wav',
 '21.wav',
 '37.wav',
 '93.wav',
 '174.wav',
 '214.wav',
 '120.wav',
 '10.wav',
 '105.wav',
 '74.wav',
 '60.wav',
 '224.wav',
 '41.wav',
 '141.wav',
 '197.wav',
 '100.wav',
 '46.wav',
 '70.wav',
 '218.wav',
 '163.wav',
 '19.wav',
 '199.wav',
 '79.wav',
 '73.wav',
 '212.wav',
 '82.wav',
 '103.wav',
 '185.wav',
 '201.wav',
 '15.wav',
 '148.wav',
 '167.wav',
 '0.wav',
 '213.wav',
 '8.wav',
 '33.wav',
 '191.wav',
 '29.wav',
 '51.wav',
 '145.wav',
 '176.wav',
 '140.wav',
 '170.wav',
 '67.wav',
 '32.wav',
 '87.wav',
 '.ipynb_checkpoints',
 '57.wav',
 '187.wav',
 '6.wav',
 '142.wav',
 '155.wav',
 '5.wav',
 '132.wav',
 '76.wav',
 '119.wav',
 '225.wav',
 '23.wav',
 '129.wav',
 '42.wav',
 '50.wav',
 '203.wav',
 '164.wav',
 '7.wav',
 '180.wav',
 '49.wav',
 '77.wav',
 '24.wav',
 '193.wav',
 '66.wav',
 '3.wav',
 '58.wav',
 '184.wav',
 '71.wav',
 '183.wav',
 '39.wav',
 '48.wav',
 '123.wav',
 '186.wav',
 '102.wav',
 '114

In [None]:
!zip -r /content/dataset.zip /content/dataset

  adding: content/dataset/ (stored 0%)
  adding: content/dataset/.ipynb_checkpoints/ (stored 0%)
  adding: content/dataset/target.csv (deflated 100%)
  adding: content/dataset/narendra modi/ (stored 0%)
  adding: content/dataset/narendra modi/173.wav (deflated 42%)
  adding: content/dataset/narendra modi/777.wav (deflated 45%)
  adding: content/dataset/narendra modi/416.wav (deflated 56%)
  adding: content/dataset/narendra modi/555.wav (deflated 61%)
  adding: content/dataset/narendra modi/466.wav (deflated 34%)
  adding: content/dataset/narendra modi/90.wav (deflated 41%)
  adding: content/dataset/narendra modi/381.wav (deflated 33%)
  adding: content/dataset/narendra modi/549.wav (deflated 37%)
  adding: content/dataset/narendra modi/253.wav (deflated 72%)
  adding: content/dataset/narendra modi/356.wav (deflated 45%)
  adding: content/dataset/narendra modi/128.wav (deflated 44%)
  adding: content/dataset/narendra modi/397.wav (deflated 39%)
  adding: content/dataset/narendra modi/43

In [None]:
from google.colab import files
files.download("/content/dataset.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
!ls -all -h

total 245M
drwxr-xr-x 1 root root 4.0K Sep 26 11:30 .
drwxr-xr-x 1 root root 4.0K Sep 26 06:33 ..
-rw-r--r-- 1 root root  77M Sep 26 06:51 1.wav
-rw-r--r-- 1 root root 431K Sep 26 08:23 audio.wav
drwxr-xr-x 4 root root 4.0K Sep 16 13:39 .config
drwxr-xr-x 4 root root 4.0K Sep 26 10:13 dataset
-rw-r--r-- 1 root root 168M Sep 26 11:30 dataset.zip
drwx------ 5 root root 4.0K Sep 26 06:35 drive
drwxr-xr-x 2 root root 4.0K Sep 26 07:02 .ipynb_checkpoints
drwxr-xr-x 5 root root 4.0K Sep 26 07:19 raw_data
drwxr-xr-x 1 root root 4.0K Sep 16 13:40 sample_data


In [None]:
!cp dataset.zip ./drive/MyDrive/

In [None]:
!ls ./drive/MyDrive/ | grep *.zip

dataset.zip
