In [1]:
import pandas as pd
import noisereduce as nr
import pandas as pd
import os
from pydub import AudioSegment
from pathlib import Path
from scipy.io.wavfile import read, write

In [2]:
data_dud = pd.read_pickle('YuryDud_TTS_date.pkl')

In [3]:
data_dud.head()

Unnamed: 0,0,1
0,YD_short/Akunin/short_103.wav,1.36
1,YD_short/Akunin/short_575.wav,3.764989
2,YD_short/Akunin/short_801.wav,1.7
3,YD_short/Akunin/short_546.wav,1.875011
4,YD_short/Akunin/short_844.wav,1.624989


In [4]:
data_dud.shape

(2660, 2)

In [5]:
def denoise(path_to_wav: str, dir_to_save: str, coef_denosie: float):
    '''
    example:
        path to wav -> /path/to/wav/file.wav
        dir_to_save -> /path/to/dir/save
        save file -> /path/to/dir/save/file.wav
    '''
    key = path_to_wav.split('/')[-2]
    if not os.path.exists( Path(dir_to_save,key) ):
        os.makedirs( Path(dir_to_save, key) )
    try:
        sr, data = read(path_to_wav)
        reduced_noise = nr.reduce_noise(y=data, sr=sr, n_std_thresh_stationary=coef_denosie, stationary=True)
        write(Path(dir_to_save, key, path_to_wav.split('/')[-1]), sr, reduced_noise)
    except Exception as e:
        print(f'File - {path_to_wav} - not denoising')
        print(e)

In [6]:
### Change coef
coef_denosie = 0.0
data_dud[0].apply(lambda x: denoise(x, 'YD_denoise_short_wavs/', coef_denosie = coef_denosie))

0       None
1       None
2       None
3       None
4       None
        ... 
2655    None
2656    None
2657    None
2658    None
2659    None
Name: 0, Length: 2660, dtype: object

In [7]:
### create new meta
data_dud[2] = data_dud[1]
data_dud[1] = data_dud[0].apply(lambda x: 'YD_denoise_short_wavs/' + '/'.join(x.split('/')[-2:]))

In [8]:
data_dud.rename(columns={0:'path_to_original_short',1:'path_to_denoise_short',2:'duration'}, inplace=True)

In [9]:
data_dud.head()

Unnamed: 0,path_to_original_short,path_to_denoise_short,duration
0,YD_short/Akunin/short_103.wav,YD_denoise_short_wavs/Akunin/short_103.wav,1.36
1,YD_short/Akunin/short_575.wav,YD_denoise_short_wavs/Akunin/short_575.wav,3.764989
2,YD_short/Akunin/short_801.wav,YD_denoise_short_wavs/Akunin/short_801.wav,1.7
3,YD_short/Akunin/short_546.wav,YD_denoise_short_wavs/Akunin/short_546.wav,1.875011
4,YD_short/Akunin/short_844.wav,YD_denoise_short_wavs/Akunin/short_844.wav,1.624989


In [10]:
### ASR init
import nemo.collections.asr as nemo_asr
ASR = nemo_asr.models.EncDecCTCModel.restore_from("checkpoints/QuartzNet15x5_golos.nemo", map_location='cuda')

NOTE! Installing ujson may make loading annotations faster.


################################################################################
###          (please add 'export KALDI_ROOT=<your_path>' in your $HOME/.profile)
###          (or run as: KALDI_ROOT=<your_path> python <your_script>.py)
################################################################################

[NeMo W 2022-04-04 08:37:23 modelPT:148] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    manifest_filepath: train/golos_and_mcv.jsonl
    sample_rate: 16000
    labels:
    - ' '
    - а
    - б
    - в
    - г
    - д
    - е
    - ж
    - з
    - и
    - й
    - к
    - л
    - м
    - н
    - о
    - п
    - р
    - с
    - т
    - у
    - ф
    - х
    - ц
    - ч
    - ш
    - щ
    - ъ
    - ы
    - ь
    - э
    - ю
    - я
    batch_size: 64
    trim_silence: false
    max_duration: 20.0
    min_duration: 0.1
    num_workers

[NeMo I 2022-04-04 08:37:23 features:255] PADDING: 16
[NeMo I 2022-04-04 08:37:23 features:272] STFT using torch
[NeMo I 2022-04-04 08:37:25 save_restore_connector:157] Model EncDecCTCModel was successfully restored from /workspace/byebye/pet_pr/checkpoints/QuartzNet15x5_golos.nemo.


In [11]:
transcript = ASR.transcribe(data_dud['path_to_denoise_short'], batch_size=32)

Transcribing:   0%|          | 0/84 [00:00<?, ?it/s]

In [13]:
data_dud['transcript'] = transcript
data_dud.head(10)

Unnamed: 0,path_to_original_short,path_to_denoise_short,duration,transcript
0,YD_short/Akunin/short_103.wav,YD_denoise_short_wavs/Akunin/short_103.wav,1.36,
1,YD_short/Akunin/short_575.wav,YD_denoise_short_wavs/Akunin/short_575.wav,3.764989,да на тиковил его мытеваться в том что я попро...
2,YD_short/Akunin/short_801.wav,YD_denoise_short_wavs/Akunin/short_801.wav,1.7,
3,YD_short/Akunin/short_546.wav,YD_denoise_short_wavs/Akunin/short_546.wav,1.875011,то что этот скилон то же присутствует
4,YD_short/Akunin/short_844.wav,YD_denoise_short_wavs/Akunin/short_844.wav,1.624989,но там не было жиреноское писко
5,YD_short/Akunin/short_101.wav,YD_denoise_short_wavs/Akunin/short_101.wav,2.54,для начала какой вообще относится к тому что г...
6,YD_short/Akunin/short_687.wav,YD_denoise_short_wavs/Akunin/short_687.wav,3.124989,а запад е не доценил а о чей час что мог сделать
7,YD_short/Akunin/short_233.wav,YD_denoise_short_wavs/Akunin/short_233.wav,2.02,ну и в россии вообще сейчас
8,YD_short/Akunin/short_350.wav,YD_denoise_short_wavs/Akunin/short_350.wav,7.875011,безусловное могу быть наименно что все таке ко...
9,YD_short/Akunin/short_102.wav,YD_denoise_short_wavs/Akunin/short_102.wav,2.02,кучино сравнивает с николаем первым


In [18]:
data_dud.shape

(2457, 4)

In [23]:
data_dud = data_dud[data_dud['transcript'] != ' '].reset_index(drop=True)
data_dud = data_dud[data_dud['transcript'] != ''].reset_index(drop=True)

In [25]:
data_dud.to_pickle('YuryDud_TTS_date.pkl')