In [12]:
import numpy as np
import matplotlib.pyplot as plt
import librosa
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
import seaborn as sns
import soundfile as sf

In [62]:
DATA_PATH = "BazaCut/"
FILIP_PATH = DATA_PATH + "wordsFilip/"
SEB_PATH = DATA_PATH + "wordsSebastian/"
MARCIN_PATH = DATA_PATH + "wordsMarcin/"
PROF_PATH = DATA_PATH + "wordsProfessor/"

FILIP_PATH_SENT = DATA_PATH + "sentencesFilip/"
SEB_PATH_SENT  = DATA_PATH + "sentencesSebastian/"
MARCIN_PATH_SENT  = DATA_PATH + "sentencesMarcin/"
PROF_PATH_SENT  = DATA_PATH + "sentencesProfessor/"

no_of_words = 30
no_of_sentences = 45
prefixes = ["F","S","M","L"]
Fs = 48000

words_filip = [i for i in range(no_of_words)]
words_seb = [i for i in range(no_of_words)]
words_marcin = [i for i in range(no_of_words)]
words_prof = [i for i in range(no_of_words)]

sentences_filip = [i for i in range(no_of_sentences)]
sentences_seb = [i for i in range(no_of_sentences)]
sentences_marcin = [i for i in range(no_of_sentences)]
sentences_prof = [i for i in range(no_of_sentences)]
neutral = ["N"]*15
happy = ["H"] *15
anger = ["A"] *15
L = neutral+happy+anger


NOISE_PATH = "BazaNoise/"
noise_file = NOISE_PATH + "noise.wav"
y, sr = librosa.load(noise_file, sr=None)
NOISE = y

### Wczytywanie słów

In [None]:
for i in range(1, no_of_words+1):
    file_name = "word"+str(i)+"_F"+".wav"
    y, sr = librosa.load(FILIP_PATH+file_name, sr=None)
    words_filip[i-1] = y

for i in range(1, no_of_words+1):
    file_name = "word"+str(i)+"_S"+".wav"
    y, sr = librosa.load(SEB_PATH+file_name, sr=None)
    words_seb[i-1] = y

for i in range(1, no_of_words+1):
    file_name = "word"+str(i)+"_M"+".wav"
    y, sr = librosa.load(MARCIN_PATH+file_name, sr=None)
    words_marcin[i-1] = y

for i in range(1, no_of_words+1):
    file_name = "word"+str(i)+"_L"+".wav"
    y, sr = librosa.load(PROF_PATH+file_name, sr=None)
    words_prof[i-1] = y

### wczytywanie zdań

In [63]:
for i in range(1, no_of_sentences+1):
    file_name = "sentence"+str(i)+L[i-1]+"_F"+".wav"
    y, sr = librosa.load(FILIP_PATH_SENT+file_name, sr=None)
    sentences_filip[i-1] = y

for i in range(1, no_of_sentences+1):
    file_name = "sentence"+str(i)+L[i-1]+"_S"+".wav"
    y, sr = librosa.load(SEB_PATH_SENT+file_name, sr=None)
    sentences_seb[i-1] = y

for i in range(1, no_of_sentences+1):
    file_name = "sentence"+str(i)+L[i-1]+"_M"+".wav"
    y, sr = librosa.load(MARCIN_PATH_SENT+file_name, sr=None)
    sentences_marcin[i-1] = y

for i in range(1, no_of_sentences+1):
    file_name = "sentence"+str(i)+L[i-1]+"_L"+".wav"
    y, sr = librosa.load(PROF_PATH_SENT+file_name, sr=None)
    sentences_prof[i-1] = y

### Dodawanie nagranego hałasu do słów
### x\*mowa+y\*hałas
### x,y w (0,1) i x+y = 1

In [24]:
curr_word = words_filip[2]
length = len(curr_word)
noise_to_add = NOISE[:length]

added_noise = curr_word*0.5+noise_to_add*0.5
output_file = 'output_audio.wav'
sf.write(output_file, added_noise, sr)

print(f"Audio saved to {output_file}")
#Działa

Audio saved to output_audio.wav


In [45]:
def add_and_save_added_noise(data,noise,folder_path, which):
    noise_proportions = np.arange(0.1,1,0.1)
    no_of_words = len(data)
    for noise_proportion in noise_proportions:
        for i in range(no_of_words):
            curr_word = data[i]
            length = len(curr_word)
            noise_to_add = noise[:length]
            speech_power = 1 - noise_proportion
            
            added_noise = curr_word*speech_power+noise_to_add*noise_proportion
            sufix = "_"+"N"+str(int(noise_proportion*100))
            which_person = "_"+which
            file_name = "word"+str(i)+which_person+sufix+".wav"
            full_path = folder_path+file_name

            sf.write(full_path, added_noise, sr)

### Dodawanie szumu do słów

In [46]:
DATA_PATH = "BazaNoise/"
FILIP_PATH = DATA_PATH + "wordsFilipNoise/"
SEB_PATH = DATA_PATH + "wordsSebastianNoise/"
MARCIN_PATH = DATA_PATH + "wordsMarcinNoise/"
PROF_PATH = DATA_PATH + "wordsProfessorNoise/"

#Filip
noise_proportions = np.arange(0.1,1,0.1)

add_and_save_added_noise(words_filip, NOISE, FILIP_PATH,"F")        
add_and_save_added_noise(words_seb, NOISE, SEB_PATH,"S")    
add_and_save_added_noise(words_marcin, NOISE, MARCIN_PATH,"M")    
add_and_save_added_noise(words_prof, NOISE, PROF_PATH,"L")    


### Dodawanie szumu do zdań

In [64]:
def add_and_save_added_noise(data,noise,folder_path, which,labels):
    noise_proportions = np.arange(0.1,1,0.1)
    no_of_sentences = len(data)
    for noise_proportion in noise_proportions:
        for i in range(no_of_sentences):
            curr_word = data[i]
            length = len(curr_word)
            noise_to_add = noise[:length]
            speech_power = 1 - noise_proportion
            
            added_noise = curr_word*speech_power+noise_to_add*noise_proportion
            sufix = "_"+"N"+str(int(noise_proportion*100))
            which_person = "_"+which

            file_name = "sentence"+str(i+1)+labels[i]+which_person+sufix+".wav"
            full_path = folder_path+file_name

            sf.write(full_path, added_noise, sr)

In [None]:
DATA_PATH = "BazaNoise/"
FILIP_PATH = DATA_PATH + "sentencesFilipNoise/"
SEB_PATH = DATA_PATH + "sentencesSebastianNoise/"
MARCIN_PATH = DATA_PATH + "sentencesMarcinNoise/"
PROF_PATH = DATA_PATH + "sentencesProfessorNoise/"


add_and_save_added_noise(sentences_filip, NOISE,FILIP_PATH,"F",L)
add_and_save_added_noise(sentences_seb, NOISE,SEB_PATH,"S",L)
add_and_save_added_noise(sentences_marcin, NOISE,MARCIN_PATH,"M",L)
add_and_save_added_noise(sentences_prof, NOISE,PROF_PATH,"L",L)

print(sentences_prof[30])

[ 0.00038791 -0.00017631 -0.00066841 ...  0.00053966  0.00057769
  0.00061023]
