In [1]:
import pandas as pd
import os
import webrtcvad
from pydub import AudioSegment
from pydub.silence import split_on_silence
from pydub.effects import low_pass_filter,high_pass_filter
import contextlib
import matplotlib.pyplot as plt
import numpy as np
from sklearn.utils import shuffle

In [6]:
#Konstanten
TARGET_SAMPLE_RATE = 16000
WAVE_TYPE = "PCM_16"

#IO
READ_PATH_VOICE           = "./SAMPLES UNPROCESSED/VOICE/"
READ_PATH_NOISE           = "./SAMPLES UNPROCESSED/NOISES/"
WRITE_PATH_VOICE_CLEAN    = "./SAMPLES PROCESSED/VOICE CLEAN/"
WRITE_PATH_NO_VOICE       = "./SAMPLES PROCESSED/NO VOICE/"

In [3]:
#zerteilt DataFrame
def chunker(frame:pd.DataFrame,chunk_size=1000):
    frame_length = len(frame)
    for idx_lower in range(0,frame_length,chunk_size):
        
        idx_higher = idx_lower + chunk_size
        if idx_higher > frame_length:
            idx_higher = frame_length
        
        yield frame[idx_lower:idx_higher]

In [None]:
#Konstante 
SAMPLE_LENGTH = 30 #ms

#Löschen der leisen Teile
SILENCE_LENGTH = 10 #ms
SILENCE_TRESHHOLD = -15 #dBfS
KEEP_SILENCE = SILENCE_LENGTH

#Sample Erzeugung
CREATE_SAMPLES_PER_SECOND = 50

#FX
LOW_CUT  = 300
HIGH_CUT = 5000

In [4]:
#Lädt alle Sampless
samples = []
counter_chunks  = 0
counter_samples = 0

for subdir, dirs, files in os.walk(READ_PATH_VOICE):
    for file in files:
        filepath = os.path.join(subdir, file)
        #Fügt neue Zeile an
        samples.append({
            "filename"                : file,
            "fileending"              : file.split(".")[-1].lower(),
            "filepath"                : filepath,
        })

#zu DataFrame
samples = pd.DataFrame(samples)

#Shuffle
samples = shuffle(samples)

#Nur SoundFiles
samples = samples[samples["fileending"].str.contains("wav|mp3|ogg|flac")]

#loading
__chunk_size= 200
for chunk in chunker(samples,chunk_size=__chunk_size):
    parts = chunk.copy().reset_index(drop=True)
    
    #Lädt AudioDatei
    parts["audioSegment"] = parts.apply(lambda row: AudioSegment.from_file(file=row["filepath"],format=row["fileending"]),axis=1)
    parts["audioSegment"] = parts["audioSegment"].apply(lambda audioSegment: audioSegment.set_channels(1))
    parts["audioSegment"] = parts["audioSegment"].apply(lambda audioSegment: audioSegment.set_frame_rate(TARGET_SAMPLE_RATE))
    print("LOADING DONE")

    #Setzt maxdBFS
    parts["maxdBFS"]          = parts["audioSegment"].apply(lambda audioSegment: audioSegment.max_dBFS)
    parts["silence_treshold"] = (parts["maxdBFS"] + SILENCE_TRESHHOLD).round(1)

    #Splittet Audio
    parts["audioSegment"] = parts.apply(lambda row: split_on_silence(row["audioSegment"], min_silence_len=SILENCE_LENGTH, silence_thresh=row["silence_treshold"], keep_silence=KEEP_SILENCE),axis=1)
    parts = parts.explode("audioSegment")
    print("SPLITTING DONE")

    #Entfernt problematische Stellen
    parts = parts[parts["audioSegment"].apply(lambda audioSegment: isinstance(audioSegment, AudioSegment))]

    #Reset Index damit Parts zugeordnet werden
    parts = parts.reset_index(drop=True)

    #Findet Länge heraus
    parts["length_in_seconds"] = parts["audioSegment"].apply(lambda audioSegment: audioSegment.duration_seconds)
    parts["length_in_ms"]      = parts["length_in_seconds"] * 1000

    #Löscht zu kurze Abschnitte heraus
    parts = parts[parts["length_in_ms"] > SAMPLE_LENGTH]

    #Erzeugt Samples aus der Datei
    parts["Number_of_Samples"] = (parts["length_in_seconds"] * CREATE_SAMPLES_PER_SECOND).apply(np.ceil).astype("int")
    parts["_max_start_point"]  = parts["length_in_ms"] - SAMPLE_LENGTH
    parts["sample_start"]      = parts.apply(func=lambda part: np.random.randint(low=0,high=part["_max_start_point"],size=part["Number_of_Samples"]), axis=1)
    parts = parts.explode("sample_start")
    parts["sample_end"]   = parts["sample_start"] + SAMPLE_LENGTH
    parts["audioSegment"] = parts.apply(func=lambda part: part["audioSegment"][part["sample_start"]:part["sample_end"]], axis=1)

    #Numeriert AudioSegmente
    parts = parts.reset_index().groupby(by="index",as_index = False).apply(lambda x: x.reset_index(drop = True)).reset_index().drop(["index"], axis=1)

    #etwas cleanup
    parts = parts.drop(["filename", "fileending", "Number_of_Samples","_max_start_point", "maxdBFS","length_in_seconds", "length_in_ms"],axis=1)

    #FX
    parts["low_cut"]  = np.random.choice(2,len(parts),p=[0.8,0.2])
    parts["high_cut"] = np.random.choice(2,len(parts),p=[0.8,0.2])
    parts["low_cut_freq"]  = np.random.randint(size = len(parts), low=1,       high=LOW_CUT)
    parts["high_cut_freq"] = np.random.randint(size = len(parts), low=HIGH_CUT,high=20000)
    parts["audioSegment"] = parts.apply(func = lambda row: high_pass_filter (row["audioSegment"], row["low_cut_freq"])  if row["low_cut"]  else row["audioSegment"], axis=1)
    parts["audioSegment"] = parts.apply(func = lambda row: low_pass_filter  (row["audioSegment"], row["high_cut_freq"]) if row["high_cut"] else row["audioSegment"], axis=1)

    parts["outFilename"] = parts["level_1"].astype(str) + "_start_" + parts.sample_start.astype(str) + "_" + "end_" + parts.sample_end.astype(str) + "_" + \
        parts.filepath\
            .str.replace(READ_PATH_VOICE, "", regex=True) \
            .str.replace("/",             "_",regex=True) \
            .str.replace(r"\\",           "_",regex=True) \
            .str.replace(".",             "_",regex=True)

    #export
    parts.apply(func = lambda row: row["audioSegment"].export(os.path.join(WRITE_PATH_VOICE_CLEAN + row["outFilename"] + ".wav"), format="wav").close(),axis=1)

    counter_chunks += __chunk_size
    counter_samples += len(parts)
    print(f"chunk: {counter_chunks}| samples: {counter_samples}")
