Update all references to 44.1kHz with 16kHz (#275)

* Update all references to 44.1kHz with 16kHz * also set high-freq Co-authored-by: ben foley <ben@cbmm.io>
CoEDL · Dec 8, 2021 · 53c3a1e · 53c3a1e
1 parent 82bca05
commit 53c3a1e
Show file tree

Hide file tree

Showing 7 changed files with 9 additions and 9 deletions.
diff --git a/docs/wiki/elpis-workshop.md b/docs/wiki/elpis-workshop.md
@@ -302,7 +302,7 @@ You will typically get better results with few hours of files by using recording
 
 For Elpis, the file format requirements are:
 
-a) WAV audio, preferably 44.1kHz mono but the system can convert stereo files and resample from different sample rates.
+a) WAV audio, preferably 16kHz mono but the system can convert stereo files and resample from different sample rates.
 
 b) Orthographic transcription of the audio. For today’s workshop, the interface is using Elan transcriptions, soon we will be able to use text files.
 

diff --git a/docs/wiki/preparing-files.md b/docs/wiki/preparing-files.md
@@ -29,9 +29,9 @@ Identify which Elan tier the transcriptions are on that you want the system to l
 
 ## Audio
 
-Elpis trains using 16 bit, 44.1kHz, mono WAV format audio. It will convert WAV files to the required specification, however converting your audio to these specifications beforehand will reduce the training time.
+Elpis trains using 16 bit, 16kHz, mono WAV format audio. It will convert WAV files to the required specification, however converting your audio to these specifications beforehand will reduce the training time.
 - 16 bit is the bit-depth, the number of values in each audio sample. 
-- 44.1kHz is the sample rate, also known as the sample frequency.
+- 16kHz is the sample rate, also known as the sample frequency.
 - Mono refers to the audio having only a single channel, rather than stereo being two channels.
 
 Ensure the audio is in WAV format. MP3 is not suitable because the MP3 compression removes much of the information in the audio signal. Converting from MP3 to WAV doesn't work either, as the information lost in the compression is not recovered in conversion. 

diff --git a/elpis/engines/common/input/resample.py b/elpis/engines/common/input/resample.py
@@ -6,6 +6,6 @@
 def resample(src_path: Path, dst_path: Path):
     src_path = Path(src_path)
     dst_path = Path(dst_path)
-    sox_arguments = [SOX_PATH, f'{src_path}', "-b", "16", "-c", "1", "-r", "44.1k", "-t", "wav",
+    sox_arguments = [SOX_PATH, f'{src_path}', "-b", "16", "-c", "1", "-r", "16k", "-t", "wav",
                      f'{dst_path}']
     subprocess.call(sox_arguments)
diff --git a/elpis/engines/common/input/resample_audio.py b/elpis/engines/common/input/resample_audio.py
@@ -42,7 +42,7 @@ def process_item(sox_arguments: Tuple[int, str, threading.Lock, Set[str], str])
     temporary_file_name = join_norm(output_directory, "%s.%s" % (base_directory, "wav"))
 
     if not os.path.exists(temporary_file_name):
-        sox_arguments = [SOX_PATH, input_name, "-b", "16", "-c", "1", "-r", "44.1k", "-t", "wav",
+        sox_arguments = [SOX_PATH, input_name, "-b", "16", "-c", "1", "-r", "16k", "-t", "wav",
                          temporary_file_name]
         subprocess.call(sox_arguments)
     return temporary_file_name

diff --git a/elpis/engines/kaldi/objects/model.py b/elpis/engines/kaldi/objects/model.py
@@ -144,10 +144,10 @@ def prepare_for_training():
                 with mfcc_resource.open() as fin:
                     content = Template(fin.read()).render(
                         {
-                            'MFCC_SAMPLE_FREQUENCY': '44100',
+                            'MFCC_SAMPLE_FREQUENCY': '16000',
                             'MFCC_FRAME_LENGTH': '25',
                             'MFCC_LOW_FREQ': '20',
-                            'MFCC_HIGH_FREQ': '22050',
+                            'MFCC_HIGH_FREQ': '7800',
                             'MFCC_NUM_CEPS': '7',
                         }
                     )

diff --git a/elpis/gui/src/translations/en-GB/common.json b/elpis/gui/src/translations/en-GB/common.json
@@ -63,7 +63,7 @@
         },
         "files": {
             "title": "Add files",
-            "description": "Upload your language recording and transcription files here. \n Please use 44.1kHz WAV format audio, and .eaf Elan transcription files. \n Audio files and transcription files need to have matching filenames. \n You can also upload text files like wordlists or stories that don't have audio.",
+            "description": "Upload your language recording and transcription files here. \n Please use 16kHz WAV format audio, and .eaf Elan transcription files. \n Audio files and transcription files need to have matching filenames. \n You can also upload text files like wordlists or stories that don't have audio.",
             "filesHeader": "Uploaded files",
             "audioFilesHeader": "Audio files",
             "transcriptionFilesHeader": "Transcription files",

diff --git a/elpis/gui/src/translations/hin/common.json b/elpis/gui/src/translations/hin/common.json
@@ -63,7 +63,7 @@
         },
         "files": {
             "title": "फाइलें जोड़ें",
-            "description": "अपनी भाषा की रिकॉर्डिंग और प्रतिलेखन की फ़ाइलें यहां अपलोड करें। \n कृपया 44.1kHz WAV प्रारूप ऑडियो, और .eaf Elan प्रतिलेखन फ़ाइलों का उपयोग करें। \n ऑडियो फाइलों और प्रतिलेखन फाइलों के मेल खाने वाले नाम होने चाहिए। \n आप लिखित फ़ाइलें जैसे शब्द सूची या ऐसी कहानियां भी अपलोड कर सकते हैं जिनमें ऑडियो नहीं हो।",
+            "description": "अपनी भाषा की रिकॉर्डिंग और प्रतिलेखन की फ़ाइलें यहां अपलोड करें। \n कृपया 16kHz WAV प्रारूप ऑडियो, और .eaf Elan प्रतिलेखन फ़ाइलों का उपयोग करें। \n ऑडियो फाइलों और प्रतिलेखन फाइलों के मेल खाने वाले नाम होने चाहिए। \n आप लिखित फ़ाइलें जैसे शब्द सूची या ऐसी कहानियां भी अपलोड कर सकते हैं जिनमें ऑडियो नहीं हो।",
             "filesHeader": "अपलोड की गई फ़ाइलें",
             "audioFilesHeader": "ऑडियो फ़ाइलें",
             "transcriptionFilesHeader": "प्रतिलेखन फ़ाइलें",