Skip to content

Commit

Permalink
Fixing line length error: Black formatter engines/common/input (#288)
Browse files Browse the repository at this point in the history
* Initial formatting of engines/common/input folder

* Formatting according to the 100 characters line length limit
  • Loading branch information
aviraljain99 committed Jun 16, 2022
1 parent 2a0e1a0 commit 34629a5
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 53 deletions.
10 changes: 2 additions & 8 deletions elpis/engines/common/input/clean_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,7 @@ def are_words_valid(
return False

# Exclude utterance if > 10% english
if (
remove_english
and len(clean_words) > 0
and english_word_count / len(clean_words) > 0.1
):
if remove_english and len(clean_words) > 0 and english_word_count / len(clean_words) > 0.1:
# logger.debug(round(english_word_count / len(clean_words)), trans, file=sys.stderr)
return False

Expand Down Expand Up @@ -305,9 +301,7 @@ def main() -> None:
action="store_true",
)
# TODO add defaults
parser.add_argument(
"-c", "--punctuation_to_collapse_by", type=str, help="Chars to strip"
)
parser.add_argument("-c", "--punctuation_to_collapse_by", type=str, help="Chars to strip")
parser.add_argument(
"-e",
"--punctuation_to_explode_by",
Expand Down
16 changes: 4 additions & 12 deletions elpis/engines/common/input/elan_to_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,7 @@ def process_eaf(
:return: a list of dictionaries, where each dictionary is an annotation
"""

logger.info(
f"processing eaf {input_elan_file} using {tier_order} {tier_type} {tier_name}"
)
logger.info(f"processing eaf {input_elan_file} using {tier_order} {tier_type} {tier_name}")

# Get paths to files
input_directory, full_file_name = os.path.split(input_elan_file)
Expand Down Expand Up @@ -178,22 +176,16 @@ def main():
parser.add_argument(
"-o", "--output_dir", help="Output directory", default="../input/output/tmp/"
)
parser.add_argument(
"-t", "--tier", help="Target language tier name", default="Phrase"
)
parser.add_argument("-t", "--tier", help="Target language tier name", default="Phrase")
parser.add_argument("-j", "--output_json", help="File path to output json")
arguments: argparse.Namespace = parser.parse_args()

# Build output directory if needed
if not os.path.exists(arguments.output_dir):
os.makedirs(arguments.output_dir)

all_files_in_directory = set(
glob.glob(os.path.join(arguments.input_dir, "**"), recursive=True)
)
input_elan_files = [
file_ for file_ in all_files_in_directory if file_.endswith(".eaf")
]
all_files_in_directory = set(glob.glob(os.path.join(arguments.input_dir, "**"), recursive=True))
input_elan_files = [file_ for file_ in all_files_in_directory if file_.endswith(".eaf")]

annotations_data = []

Expand Down
7 changes: 2 additions & 5 deletions elpis/engines/common/input/make_prn_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,16 +118,13 @@ def generate_pronunciation_dictionary(
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--infile", type=str, required=True, help="")
parser.add_argument(
"-o", "--outfile", type=str, required=True, help="name of the output file"
)
parser.add_argument("-o", "--outfile", type=str, required=True, help="name of the output file")
parser.add_argument(
"-c",
"--config",
type=str,
required=True,
help="configuration file with one letter/symbol "
"-> sound mapping in each line",
help="configuration file with one letter/symbol " "-> sound mapping in each line",
)
arguments = parser.parse_args()

Expand Down
3 changes: 1 addition & 2 deletions elpis/engines/common/input/resample_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@ def process_item(sox_arguments: Tuple[int, str, threading.Lock, Set[str], str])

def main() -> None:
parser = argparse.ArgumentParser(
description="This script will silence a wave file based on "
"annotations in an Elan tier "
description="This script will silence a wave file based on " "annotations in an Elan tier "
)
parser.add_argument(
"-c",
Expand Down
15 changes: 4 additions & 11 deletions elpis/engines/common/input/trs_to_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,14 @@ def conditional_log(condition: bool, text: str) -> None:
if condition:
if platform.system() == "Windows":
sys.stderr.write(
text.encode("cp850", errors="backslashreplace").decode(
sys.stdout.encoding
)
text.encode("cp850", errors="backslashreplace").decode(sys.stdout.encoding)
)
else:
sys.stderr.write(text)
sys.stderr.flush()


def process_trs(
file_name: str, verbose_output: bool
) -> List[Dict[str, Union[str, float]]]:
def process_trs(file_name: str, verbose_output: bool) -> List[Dict[str, Union[str, float]]]:

"""
Method to process the trs files and return a list of utterances.
Expand Down Expand Up @@ -91,17 +87,14 @@ def process_turn(
turn_end: float = float(turn_node.attrib["endTime"])
speaker_id: str = turn_node.get("speaker", "")

speaker_name_node: ElementTree.Element = tree.find(
".//Speaker[@id='%s']" % speaker_id
)
speaker_name_node: ElementTree.Element = tree.find(".//Speaker[@id='%s']" % speaker_id)
if speaker_name_node is not None:
speaker_name: str = speaker_name_node.attrib["name"]
else:
speaker_name: str = str(uuid.uuid4())

items: List[Tuple[str, str]] = [
(element.attrib["time"], element.tail.strip())
for element in turn_node.findall("./Sync")
(element.attrib["time"], element.tail.strip()) for element in turn_node.findall("./Sync")
]
wave_file_name = os.path.join(".", wave_name)

Expand Down
20 changes: 5 additions & 15 deletions elpis/engines/common/input/vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
from typing import Any, Dict, List, Tuple


def get_chunks(
audio_path: str, method: str, parameter: float
) -> List[Tuple[float, float]]:
def get_chunks(audio_path: str, method: str, parameter: float) -> List[Tuple[float, float]]:
"""
Chunk voice sections from audio data extracted from an audio path with the chosen method (with its parameter).
Expand Down Expand Up @@ -40,9 +38,7 @@ def read_audio_path(audio_path: str) -> Dict[str, Any]:
return {"signal": audio_signal, "rate": sampling_rate, "top db": top_db}


def find_best_threshold(
audio_data: Dict[str, Any], method: str, parameter: str
) -> float:
def find_best_threshold(audio_data: Dict[str, Any], method: str, parameter: str) -> float:
"""
Find the best threshold of audio data for the chosen method and parameter. For all methods, if the result is higher than top db, it is lowered to the latter.
Expand Down Expand Up @@ -71,9 +67,7 @@ def find_best_threshold(
else audio_data["top db"]
)
elif method == "threshold":
threshold = (
parameter if parameter < audio_data["top db"] else audio_data["top db"]
)
threshold = parameter if parameter < audio_data["top db"] else audio_data["top db"]
return threshold


Expand All @@ -92,9 +86,7 @@ def get_continuum(
for index, threshold in enumerate(thresholds):
timestamps = get_voice_sections(audio_data, threshold)
durations = [end - begin for begin, end in timestamps]
limited_durations = [
duration for duration in durations if duration <= max_duration
]
limited_durations = [duration for duration in durations if duration <= max_duration]
values.append(
{
"timestamps": list(timestamps),
Expand All @@ -107,9 +99,7 @@ def get_continuum(
return values


def get_voice_sections(
audio_data: Dict[str, Any], threshold: float
) -> List[Tuple[float, float]]:
def get_voice_sections(audio_data: Dict[str, Any], threshold: float) -> List[Tuple[float, float]]:
"""
Find the voice sections (in seconds) of an audio data according to a threshold.
Expand Down

0 comments on commit 34629a5

Please sign in to comment.