In [1]:
import numpy as np
import pandas as pd
import sounddevice as sd
import librosa
import warnings
warnings.filterwarnings('ignore')

In [2]:
noise_df = pd.read_csv('noise_meta.csv', index_col = 'fileName')
clean_df = pd.read_csv('clean_meta.csv', names=["fileName"])

#print(noise_df['label'].value_counts())
print(noise_df.info())
print("\n")
print(clean_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 4680 entries, b020_90_100.wav to b081_120_130.wav
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   label        4680 non-null   object
 1   fileStarter  4680 non-null   object
dtypes: object(2)
memory usage: 109.7+ KB
None


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23075 entries, 0 to 23074
Data columns (total 1 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   fileName  23075 non-null  object
dtypes: object(1)
memory usage: 180.4+ KB
None


In [3]:
def adjustLengths(clean_org,noise_org):
    clean_len = len(clean_org)
    noise_len = len(noise_org)
    
    mxLength = max(clean_len, noise_len)
    clean = np.empty(mxLength)
    noise = np.empty(mxLength)
    
    if clean_len < noise_len:
        rep_time = int(np.floor(noise_len / clean_len))
        left_len = noise_len - clean_len * rep_time
        temp_data = np.tile(clean_org, [1, rep_time])
        temp_data.shape = (temp_data.shape[1], )
        clean = np.hstack((temp_data, clean_org[:left_len]))
        noise = np.array(noise_org)
#         print("cleanShapeAdjusted in if=",clean.shape)
#         print("noiseShapeAdjusted in if=",noise.shape)

    else:
        rep_time = int(np.floor(clean_len / noise_len))
        left_len = clean_len - noise_len * rep_time
        temp_data = np.tile(noise_org, [1, rep_time])
        temp_data.shape = (temp_data.shape[1], )
        noise = np.hstack((temp_data, noise_org[:left_len]))
        clean = np.array(clean_org)
#         print("cleanShapeAdjusted =",clean.shape)
#         print("noiseShapeAdjusted =",noise.shape)
    
    return clean, noise

In [4]:
def SNRmixer(clean_org, noise_org, snr_dB):
    
    clean, noise = adjustLengths(clean_org, noise_org)
    
    # Normalizing to -25 dB FS
    rmsclean = (clean**2).mean()**0.5
    scalarclean = 10 ** (-25 / 20) / rmsclean
    clean = clean * scalarclean
    rmsclean = (clean**2).mean()**0.5

    rmsnoise = (noise**2).mean()**0.5
    scalarnoise = 10 ** (-25 / 20) /rmsnoise
    noise = noise * scalarnoise
    rmsnoise = (noise**2).mean()**0.5

    # Set the noise level for a given SNR
    noisescalar = np.sqrt(rmsclean / (10**(snr_dB/20)) / rmsnoise)
    noisenewlevel = noise * noisescalar
    noisyspeech = clean + noisenewlevel
    return clean, noisenewlevel, noisyspeech

In [6]:
# ADD different number of noises to the clean speech
from sklearn.utils import shuffle

shuffledNoise = shuffle(noise_df)
simpleNoise = shuffledNoise.iloc[ :20]
   
def noisySpeechGenerator(clean_df,noise_df,numNoisySpeech, numAddedNoises, snr):
    
    shuffledClean = shuffle(clean_df)
    simpleClean = shuffledClean.iloc[:numNoisySpeech]
    simpleClean.set_index('fileName', inplace = True)
    
    simpleNoise = pd.read_csv('simpleNoise.csv', index_col = 'fileName')
    simpleNoise.at[:,'length'] = 10.000125
    #print(simpleNoise)
    for c in simpleClean.index:
        
        totalNoise = np.empty(8000*int(simpleNoise['length'].mean())+1)  # htt3`yr 3lshan tnaseb ay 3add mn el addedNoises
        #print(totalNoise.shape)
        clean, rate = librosa.load('clean_train/'+c, sr=8000) # downsampling wavfiles from 44100Hz to 8000Hz
        simpleClean.at[c,'length'] = clean.shape[0]/rate
        
        i =0
        for n in simpleNoise.index:
            noise, _ = librosa.load('noise_train/'+n, sr=8000) # downsampling wavfiles from 44100Hz to 8000Hz
            totalNoise += noise
#             print("noise= ",noise.shape)
#             print("totalNoiseShape= ",totalNoise.shape)
            i += 1 
            if (i > numAddedNoises):
                break
#         print("noisySpeechGenerator clean= ",clean.shape)
#         print("noisySpeechGenerator totalNoise= ",totalNoise.shape)
        clean, noiseNewLevel, noisySpeech  = SNRmixer(clean,totalNoise,snr)
        
noisySpeechGenerator(clean_df,noise_df,10,1,5)

noisySpeechGenerator clean=  (20848,)
noisySpeechGenerator totalNoise=  (80001,)
noisySpeechGenerator clean=  (18821,)
noisySpeechGenerator totalNoise=  (80001,)
noisySpeechGenerator clean=  (26088,)
noisySpeechGenerator totalNoise=  (80001,)
noisySpeechGenerator clean=  (24000,)
noisySpeechGenerator totalNoise=  (80001,)
noisySpeechGenerator clean=  (22640,)
noisySpeechGenerator totalNoise=  (80001,)
noisySpeechGenerator clean=  (24851,)
noisySpeechGenerator totalNoise=  (80001,)
noisySpeechGenerator clean=  (20831,)
noisySpeechGenerator totalNoise=  (80001,)
noisySpeechGenerator clean=  (25600,)
noisySpeechGenerator totalNoise=  (80001,)
noisySpeechGenerator clean=  (15794,)
noisySpeechGenerator totalNoise=  (80001,)
noisySpeechGenerator clean=  (29920,)
noisySpeechGenerator totalNoise=  (80001,)
