SuperKogito · manitbaser · Oct 11, 2020 · Nov 1, 2020 · Dec 13, 2020
diff --git a/.DS_Store b/.DS_Store
diff --git a/Code/.DS_Store b/Code/.DS_Store
diff --git a/Code/ModelsTrainer.py b/Code/ModelsTrainer.py
@@ -2,7 +2,7 @@
 import pickle
 import warnings
 import numpy as np
-from sklearn.mixture import GMM
+from sklearn.mixture import GaussianMixture as GMM
 from FeaturesExtractor import FeaturesExtractor
 from SilenceEliminator import SilenceEliminator
 
@@ -52,7 +52,7 @@
                 print("ValueError: Shape mismatch")
 
     # adapt gmm
-    gmm = GMM(n_components = 16, n_iter = 200, covariance_type='diag', n_init = 3)
+    gmm = GMM(n_components = 16, covariance_type='diag', n_init = 3)
     gmm.fit(features)
 
     # dumping the trained gaussian model

diff --git a/Code/SilenceEliminator.py b/Code/SilenceEliminator.py
@@ -7,6 +7,8 @@
 import subprocess
 import numpy as np
 from subprocess import Popen, PIPE
+import scipy.io.wavfile
+
 
 class SilenceEliminator:
 
@@ -26,8 +28,8 @@ def ffmpeg_silence_eliminator(self, input_path, output_path):
                       representing the certainty of the decision.
         """
         # filter silence in mp3 file
-        filter_command = ["ffmpeg", "-i", input_path, "-af", "silenceremove=1:0:0.05:-1:1:-36dB", "-ac", "1", "-ss", "0","-t","90", output_path, "-y"]
-        out = subprocess.Popen(filter_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+        filter_command = "ffmpeg -i "+ input_path +" -af silenceremove=1:0:-36dB "+"-ac"+" 1"+" -ss"+" 0"+" -t"+" 90 " + output_path + " -y"
+        out = subprocess.Popen(filter_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
         out.wait()
 
         with_silence_duration = os.popen("ffprobe -i '" + input_path + "' -show_format -v quiet | sed -n 's/duration=//p'").read()
@@ -41,11 +43,8 @@ def ffmpeg_silence_eliminator(self, input_path, output_path):
             print("Cannot convert float to string")
 
         # convert file to wave and read array
-        load_command = ["ffmpeg", "-i", output_path, "-f", "wav", "-" ]
-        p            = Popen(load_command, stdin=PIPE, stdout=PIPE, stderr=PIPE)
-        data         = p.communicate()[0]
-        audio_np     = np.frombuffer(data[data.find(b'\x00data')+ 9:], np.int16)
+        sample_rate, signal = scipy.io.wavfile.read(output_path)
 
         # delete temp silence free file, as we only need the array
         os.remove(output_path)
-        return audio_np, no_silence_duration
+        return signal, no_silence_duration
diff --git a/Code/SpeakerIdentifier.py b/Code/SpeakerIdentifier.py
@@ -2,8 +2,8 @@
 import pickle
 import warnings
 import numpy as np
-from SilenceEliminator import SilenceEliminator
 from FeaturesExtractor import FeaturesExtractor
+import scipy.io.wavfile
 
 warnings.filterwarnings("ignore")
 
@@ -32,11 +32,9 @@
 for path in file_paths[:]:
     if os.path.basename(path).split('_')[0] in db.keys():
         features_extractor = FeaturesExtractor()
-        silence_eliminator = SilenceEliminator()
 
-        silence_eliminated_wave_file_path ="temp-" + os.path.basename(path).split('.')[0] + ".wav"
-        audio, duration_string = silence_eliminator.ffmpeg_silence_eliminator(path, silence_eliminated_wave_file_path)
-        vector                 = features_extractor.accelerated_get_features_vector(path, audio, 8000)
+        sample_rate, signal = scipy.io.wavfile.read(path)
+        vector                 = features_extractor.accelerated_get_features_vector(path, signal, 8000)
 
         if vector.shape != (0,):
             print(vector.shape)

diff --git a/README.md b/README.md
@@ -41,4 +41,4 @@ This script require the follwing modules/libraries:
 
 ## Results and disscussion
 - The code can be further optimized using multi-threading, acceleration libs and multi-processing.
-- The accuracy can be further improved using GMM normalization aka a UBM-GMM system.
+- The accuracy can be further improved using GMM normalization aka a UBM-GMM system.
diff --git a/Run.py b/Run.py
@@ -4,19 +4,20 @@
 
 
 if __name__== "__main__":
-   # download dataset
-   print("# Download dataset zip file")
-   zip_url  = "http://www.openslr.org/resources/45/ST-AEDS-20180100_1-OS.tgz"
-   urllib.request.urlretrieve(zip_url, 'SLR45.tgz')
-
-   # extract and manage dataset files
-   print("# Mange and organize files")
-   os.system('python3 Code/DataManager.py')
-
-   # train speakers gmm models
-   print("# Train gender models")
-   os.system('python3 Code/ModelsTrainer.py')
-
-    # test system and recognise/identify speakers
-    print(" # Identify genders")
-    os.system('python3 Code/SpeakerIdentifier.py')
+
+	# download dataset
+	print("# Download dataset zip file")
+	zip_url  = "http://www.openslr.org/resources/45/ST-AEDS-20180100_1-OS.tgz"
+	urllib.request.urlretrieve(zip_url, 'SLR45.tgz')
+
+	# extract and manage dataset files
+	print("# Mange and organize files")
+	os.system('python3 Code/DataManager.py')
+
+	# train speakers gmm models
+	print("# Train gender models")
+	os.system('python3 Code/ModelsTrainer.py')
+
+	# test system and recognise/identify speakers
+	print(" # Identify genders")
+	os.system('python3 Code/SpeakerIdentifier.py')