Fixing line length error: Black formatter engines/common/input (#288)

* Initial formatting of engines/common/input folder * Formatting according to the 100 characters line length limit
CoEDL · Jun 16, 2022 · 34629a5 · 34629a5
1 parent 2a0e1a0
commit 34629a5
Show file tree

Hide file tree

Showing 6 changed files with 18 additions and 53 deletions.
diff --git a/elpis/engines/common/input/clean_json.py b/elpis/engines/common/input/clean_json.py
@@ -102,11 +102,7 @@ def are_words_valid(
         return False
 
     # Exclude utterance if > 10% english
-    if (
-        remove_english
-        and len(clean_words) > 0
-        and english_word_count / len(clean_words) > 0.1
-    ):
+    if remove_english and len(clean_words) > 0 and english_word_count / len(clean_words) > 0.1:
         # logger.debug(round(english_word_count / len(clean_words)), trans, file=sys.stderr)
         return False
 
@@ -305,9 +301,7 @@ def main() -> None:
         action="store_true",
     )
     # TODO add defaults
-    parser.add_argument(
-        "-c", "--punctuation_to_collapse_by", type=str, help="Chars to strip"
-    )
+    parser.add_argument("-c", "--punctuation_to_collapse_by", type=str, help="Chars to strip")
     parser.add_argument(
         "-e",
         "--punctuation_to_explode_by",

diff --git a/elpis/engines/common/input/elan_to_json.py b/elpis/engines/common/input/elan_to_json.py
@@ -71,9 +71,7 @@ def process_eaf(
     :return: a list of dictionaries, where each dictionary is an annotation
     """
 
-    logger.info(
-        f"processing eaf {input_elan_file} using {tier_order} {tier_type} {tier_name}"
-    )
+    logger.info(f"processing eaf {input_elan_file} using {tier_order} {tier_type} {tier_name}")
 
     # Get paths to files
     input_directory, full_file_name = os.path.split(input_elan_file)
@@ -178,22 +176,16 @@ def main():
     parser.add_argument(
         "-o", "--output_dir", help="Output directory", default="../input/output/tmp/"
     )
-    parser.add_argument(
-        "-t", "--tier", help="Target language tier name", default="Phrase"
-    )
+    parser.add_argument("-t", "--tier", help="Target language tier name", default="Phrase")
     parser.add_argument("-j", "--output_json", help="File path to output json")
     arguments: argparse.Namespace = parser.parse_args()
 
     # Build output directory if needed
     if not os.path.exists(arguments.output_dir):
         os.makedirs(arguments.output_dir)
 
-    all_files_in_directory = set(
-        glob.glob(os.path.join(arguments.input_dir, "**"), recursive=True)
-    )
-    input_elan_files = [
-        file_ for file_ in all_files_in_directory if file_.endswith(".eaf")
-    ]
+    all_files_in_directory = set(glob.glob(os.path.join(arguments.input_dir, "**"), recursive=True))
+    input_elan_files = [file_ for file_ in all_files_in_directory if file_.endswith(".eaf")]
 
     annotations_data = []
 

diff --git a/elpis/engines/common/input/make_prn_dict.py b/elpis/engines/common/input/make_prn_dict.py
@@ -118,16 +118,13 @@ def generate_pronunciation_dictionary(
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("-i", "--infile", type=str, required=True, help="")
-    parser.add_argument(
-        "-o", "--outfile", type=str, required=True, help="name of the output file"
-    )
+    parser.add_argument("-o", "--outfile", type=str, required=True, help="name of the output file")
     parser.add_argument(
         "-c",
         "--config",
         type=str,
         required=True,
-        help="configuration file with one letter/symbol "
-        "-> sound mapping in each line",
+        help="configuration file with one letter/symbol " "-> sound mapping in each line",
     )
     arguments = parser.parse_args()
 

diff --git a/elpis/engines/common/input/resample_audio.py b/elpis/engines/common/input/resample_audio.py
@@ -67,8 +67,7 @@ def process_item(sox_arguments: Tuple[int, str, threading.Lock, Set[str], str])
 
 def main() -> None:
     parser = argparse.ArgumentParser(
-        description="This script will silence a wave file based on "
-        "annotations in an Elan tier "
+        description="This script will silence a wave file based on " "annotations in an Elan tier "
     )
     parser.add_argument(
         "-c",

diff --git a/elpis/engines/common/input/trs_to_json.py b/elpis/engines/common/input/trs_to_json.py
@@ -32,18 +32,14 @@ def conditional_log(condition: bool, text: str) -> None:
     if condition:
         if platform.system() == "Windows":
             sys.stderr.write(
-                text.encode("cp850", errors="backslashreplace").decode(
-                    sys.stdout.encoding
-                )
+                text.encode("cp850", errors="backslashreplace").decode(sys.stdout.encoding)
             )
         else:
             sys.stderr.write(text)
         sys.stderr.flush()
 
 
-def process_trs(
-    file_name: str, verbose_output: bool
-) -> List[Dict[str, Union[str, float]]]:
+def process_trs(file_name: str, verbose_output: bool) -> List[Dict[str, Union[str, float]]]:
 
     """
     Method to process the trs files and return a list of utterances.
@@ -91,17 +87,14 @@ def process_turn(
     turn_end: float = float(turn_node.attrib["endTime"])
     speaker_id: str = turn_node.get("speaker", "")
 
-    speaker_name_node: ElementTree.Element = tree.find(
-        ".//Speaker[@id='%s']" % speaker_id
-    )
+    speaker_name_node: ElementTree.Element = tree.find(".//Speaker[@id='%s']" % speaker_id)
     if speaker_name_node is not None:
         speaker_name: str = speaker_name_node.attrib["name"]
     else:
         speaker_name: str = str(uuid.uuid4())
 
     items: List[Tuple[str, str]] = [
-        (element.attrib["time"], element.tail.strip())
-        for element in turn_node.findall("./Sync")
+        (element.attrib["time"], element.tail.strip()) for element in turn_node.findall("./Sync")
     ]
     wave_file_name = os.path.join(".", wave_name)
 

diff --git a/elpis/engines/common/input/vad.py b/elpis/engines/common/input/vad.py
@@ -6,9 +6,7 @@
 from typing import Any, Dict, List, Tuple
 
 
-def get_chunks(
-    audio_path: str, method: str, parameter: float
-) -> List[Tuple[float, float]]:
+def get_chunks(audio_path: str, method: str, parameter: float) -> List[Tuple[float, float]]:
     """
     Chunk voice sections from audio data extracted from an audio path with the chosen method (with its parameter).
 
@@ -40,9 +38,7 @@ def read_audio_path(audio_path: str) -> Dict[str, Any]:
     return {"signal": audio_signal, "rate": sampling_rate, "top db": top_db}
 
 
-def find_best_threshold(
-    audio_data: Dict[str, Any], method: str, parameter: str
-) -> float:
+def find_best_threshold(audio_data: Dict[str, Any], method: str, parameter: str) -> float:
     """
     Find the best threshold of audio data for the chosen method and parameter. For all methods, if the result is higher than top db, it is lowered to the latter.
 
@@ -71,9 +67,7 @@ def find_best_threshold(
             else audio_data["top db"]
         )
     elif method == "threshold":
-        threshold = (
-            parameter if parameter < audio_data["top db"] else audio_data["top db"]
-        )
+        threshold = parameter if parameter < audio_data["top db"] else audio_data["top db"]
     return threshold
 
 
@@ -92,9 +86,7 @@ def get_continuum(
     for index, threshold in enumerate(thresholds):
         timestamps = get_voice_sections(audio_data, threshold)
         durations = [end - begin for begin, end in timestamps]
-        limited_durations = [
-            duration for duration in durations if duration <= max_duration
-        ]
+        limited_durations = [duration for duration in durations if duration <= max_duration]
         values.append(
             {
                 "timestamps": list(timestamps),
@@ -107,9 +99,7 @@ def get_continuum(
     return values
 
 
-def get_voice_sections(
-    audio_data: Dict[str, Any], threshold: float
-) -> List[Tuple[float, float]]:
+def get_voice_sections(audio_data: Dict[str, Any], threshold: float) -> List[Tuple[float, float]]:
     """
     Find the voice sections (in seconds) of an audio data according to a threshold.