Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed_errors_in_ModelsTrainer #3

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
Binary file added Code/.DS_Store
Binary file not shown.
4 changes: 2 additions & 2 deletions Code/ModelsTrainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pickle
import warnings
import numpy as np
from sklearn.mixture import GMM
from sklearn.mixture import GaussianMixture as GMM
from FeaturesExtractor import FeaturesExtractor
from SilenceEliminator import SilenceEliminator

Expand Down Expand Up @@ -52,7 +52,7 @@
print("ValueError: Shape mismatch")

# adapt gmm
gmm = GMM(n_components = 16, n_iter = 200, covariance_type='diag', n_init = 3)
gmm = GMM(n_components = 16, covariance_type='diag', n_init = 3)
gmm.fit(features)

# dumping the trained gaussian model
Expand Down
13 changes: 6 additions & 7 deletions Code/SilenceEliminator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import subprocess
import numpy as np
from subprocess import Popen, PIPE
import scipy.io.wavfile


class SilenceEliminator:

Expand All @@ -26,8 +28,8 @@ def ffmpeg_silence_eliminator(self, input_path, output_path):
representing the certainty of the decision.
"""
# filter silence in mp3 file
filter_command = ["ffmpeg", "-i", input_path, "-af", "silenceremove=1:0:0.05:-1:1:-36dB", "-ac", "1", "-ss", "0","-t","90", output_path, "-y"]
out = subprocess.Popen(filter_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
filter_command = "ffmpeg -i "+ input_path +" -af silenceremove=1:0:-36dB "+"-ac"+" 1"+" -ss"+" 0"+" -t"+" 90 " + output_path + " -y"
out = subprocess.Popen(filter_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
out.wait()

with_silence_duration = os.popen("ffprobe -i '" + input_path + "' -show_format -v quiet | sed -n 's/duration=//p'").read()
Expand All @@ -41,11 +43,8 @@ def ffmpeg_silence_eliminator(self, input_path, output_path):
print("Cannot convert float to string")

# convert file to wave and read array
load_command = ["ffmpeg", "-i", output_path, "-f", "wav", "-" ]
p = Popen(load_command, stdin=PIPE, stdout=PIPE, stderr=PIPE)
data = p.communicate()[0]
audio_np = np.frombuffer(data[data.find(b'\x00data')+ 9:], np.int16)
sample_rate, signal = scipy.io.wavfile.read(output_path)

# delete temp silence free file, as we only need the array
os.remove(output_path)
return audio_np, no_silence_duration
return signal, no_silence_duration
8 changes: 3 additions & 5 deletions Code/SpeakerIdentifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import pickle
import warnings
import numpy as np
from SilenceEliminator import SilenceEliminator
from FeaturesExtractor import FeaturesExtractor
import scipy.io.wavfile

warnings.filterwarnings("ignore")

Expand Down Expand Up @@ -32,11 +32,9 @@
for path in file_paths[:]:
if os.path.basename(path).split('_')[0] in db.keys():
features_extractor = FeaturesExtractor()
silence_eliminator = SilenceEliminator()

silence_eliminated_wave_file_path ="temp-" + os.path.basename(path).split('.')[0] + ".wav"
audio, duration_string = silence_eliminator.ffmpeg_silence_eliminator(path, silence_eliminated_wave_file_path)
vector = features_extractor.accelerated_get_features_vector(path, audio, 8000)
sample_rate, signal = scipy.io.wavfile.read(path)
vector = features_extractor.accelerated_get_features_vector(path, signal, 8000)

if vector.shape != (0,):
print(vector.shape)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,4 @@ This script require the follwing modules/libraries:

## Results and disscussion
- The code can be further optimized using multi-threading, acceleration libs and multi-processing.
- The accuracy can be further improved using GMM normalization aka a UBM-GMM system.
- The accuracy can be further improved using GMM normalization aka a UBM-GMM system.
33 changes: 17 additions & 16 deletions Run.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,20 @@


if __name__== "__main__":
# download dataset
print("# Download dataset zip file")
zip_url = "http://www.openslr.org/resources/45/ST-AEDS-20180100_1-OS.tgz"
urllib.request.urlretrieve(zip_url, 'SLR45.tgz')

# extract and manage dataset files
print("# Mange and organize files")
os.system('python3 Code/DataManager.py')

# train speakers gmm models
print("# Train gender models")
os.system('python3 Code/ModelsTrainer.py')

# test system and recognise/identify speakers
print(" # Identify genders")
os.system('python3 Code/SpeakerIdentifier.py')

# download dataset
print("# Download dataset zip file")
zip_url = "http://www.openslr.org/resources/45/ST-AEDS-20180100_1-OS.tgz"
urllib.request.urlretrieve(zip_url, 'SLR45.tgz')

# extract and manage dataset files
print("# Mange and organize files")
os.system('python3 Code/DataManager.py')

# train speakers gmm models
print("# Train gender models")
os.system('python3 Code/ModelsTrainer.py')

# test system and recognise/identify speakers
print(" # Identify genders")
os.system('python3 Code/SpeakerIdentifier.py')