In [10]:
import os
import pandas as pd
import numpy as np
import librosa
import glob 
import matplotlib.pyplot as plt
import seaborn as sns
import re

import librosa                     # for working with audio in python
import librosa.display             # for waveplots, spectograms, etc
import soundfile as sf             # for accessing file information
from IPython.display import Audio
import soundfile as sf
from pathlib import Path
import warnings
import shutil
import tensorflow as tf

import tqdm
from tqdm import tqdm
from pydub import AudioSegment
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils

sns.set()
# tqdm().pandas()
warnings.filterwarnings('ignore')

# Constantes
SAMPLE_RATE = 16000        #  couvre la fréquence de la voix humaine
DT = 0.02

params = {
    'max_audio_length': 5,                                             # T_MAX : Durée max d'un fichie audio
    'alphabet': ' !"&\',-.01234:;\\abcdefghijklmnopqrstuvwxyz',
    'causal_convolutions': False,
    'stack_dilation_rates': [1, 3, 9, 27],
    'stacks': 6,
    'stack_kernel_size': 7,
    'stack_filters': 3*128,
    'sampling_rate': SAMPLE_RATE,
    #
    'n_fft': 160*8,
    'frame_step': 160*4,
    'lower_edge_hertz': 0,
    'upper_edge_hertz': 8000,
    'num_mel_bins': 160
}



DATASET_MAX_ROW = 16460     # all value = #df.shape # (16467, 10)
DATASET_MAX_TRAIN = 13168   # 80%
DATASET_MAX_TEST = 3292     # 20%


#DATASET_MAX_ROW = 1000     # all value = #df.shape # (16467, 10)
#DATASET_MAX_TRAIN = 800    # 80%
#DATASET_MAX_TEST =  200    # 20%

In [11]:
df = pd.read_csv('datasets/cv-sw-valid.tsv', sep="\t")
df.dataframeName = 'datasets/cv-sw-valid.tsv'
df = df.head(DATASET_MAX_ROW)
df

Unnamed: 0,client_id,path,sentence,up_votes,down_votes,age,gender,accent,locale,segment
0,05a87054181791477a299a08fc35a6ff0c53250cae313e...,common_voice_fr_22108074.mp3,zéro,2,0,,,,fr,Singleword Benchmark
1,06c9c9e703dfa759edf4836936b42a07afd1021cedb06c...,common_voice_fr_22098482.mp3,trois,2,0,,,,fr,Singleword Benchmark
2,07a7db773acd156dd0b7fdc32f6b5eda9b32ffa1b3aee7...,common_voice_fr_21955578.mp3,quatre,6,3,,,,fr,Singleword Benchmark
3,0dde0df66454b490f5b9514acb4bb48dc91bbe740254b0...,common_voice_fr_22379668.mp3,neuf,2,0,fourties,male,france,fr,Singleword Benchmark
4,0eb85c7dcb9b7ca2caec05a0dbbf6ee983cfab19164dac...,common_voice_fr_22157149.mp3,Firefox,3,1,,,,fr,Singleword Benchmark
...,...,...,...,...,...,...,...,...,...,...
16455,ffd847388e93bcd91855b2a4de02c87c29ed9df053da9c...,common_voice_fr_21954594.mp3,six,2,0,,,,fr,Singleword Benchmark
16456,ffd847388e93bcd91855b2a4de02c87c29ed9df053da9c...,common_voice_fr_21954595.mp3,cinq,3,0,,,,fr,Singleword Benchmark
16457,ffd847388e93bcd91855b2a4de02c87c29ed9df053da9c...,common_voice_fr_21954597.mp3,neuf,3,0,,,,fr,Singleword Benchmark
16458,ffd847388e93bcd91855b2a4de02c87c29ed9df053da9c...,common_voice_fr_21954610.mp3,Firefox,3,1,,,,fr,Singleword Benchmark


In [14]:
from pynormalize.pynormalize import process_files

target_dbfs = -13.5
EDITED_STORE = 'datasets/cv-sw-normalized'

with warnings.catch_warnings():
    # Silence RuntimeWarning about absence of ffmpeg
    warnings.simplefilter("ignore")

def NormalizeAudioFiles(size_ = None):
    Files = []
    all_path_normalized = []
    target_dbfs = -13.5
    df = pd.read_csv('datasets/cv-sw-valid.tsv', sep="\t")
    df.dataframeName = 'datasets/cv-sw-valid.tsv'
    if size_ == None:
        size_ = len(df)
    with tqdm(total=size_) as pbar:
        for index in (range(size_)):
            pbar.update(1)
            audio = df['path'].iloc[index]  
            audio_path = "datasets/cv-sw/" + audio
            fname = Path(audio_path)
            if fname.exists() == True :
                Files = [audio_path]
                process_files(Files=Files, target_dbfs=target_dbfs, directory=EDITED_STORE)
                all_path_normalized.append(audio_path)
            else:
                print("Err fichier non présent: ", audio_path)

        return all_path_normalized      

all_path_normalized = NormalizeAudioFiles()

  0%|          | 0/16467 [00:00<?, ?it/s](1 of 1) Processing file : "common_voice_fr_22108074.mp3"
  0%|          | 2/16467 [00:00<16:53, 16.24it/s](1 of 1) Processing file : "common_voice_fr_22098482.mp3"
  0%|          | 3/16467 [00:00<20:07, 13.64it/s](1 of 1) Processing file : "common_voice_fr_21955578.mp3"
(1 of 1) Processing file : "common_voice_fr_22379668.mp3"
  0%|          | 5/16467 [00:00<21:41, 12.65it/s](1 of 1) Processing file : "common_voice_fr_22157149.mp3"
  0%|          | 6/16467 [00:00<26:46, 10.25it/s](1 of 1) Processing file : "common_voice_fr_21967748.mp3"
(1 of 1) Processing file : "common_voice_fr_22219035.mp3"
  0%|          | 8/16467 [00:00<27:04, 10.13it/s](1 of 1) Processing file : "common_voice_fr_22064544.mp3"
(1 of 1) Processing file : "common_voice_fr_22042510.mp3"
  0%|          | 10/16467 [00:00<26:13, 10.46it/s](1 of 1) Processing file : "common_voice_fr_22386260.mp3"
(1 of 1) Processing file : "common_voice_fr_22140697.mp3"
  0%|          | 12/16467 

KeyboardInterrupt: 