In [None]:
from google.auth import default
credentials, project = default()

In [None]:
project

In [None]:
%load_ext autoreload
%autoreload 2
import os
import sys
from pathlib import Path
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from src.utils import load_json, save_text_file, load_text_file
from src.nlp import get_vocab_dictionary_from_phrases
from src.utils import get_longman_verb_vocab_dict, save_text_file, load_text_file
from src.phrase import generate_phrases_from_vocab_dict, generate_scenario_phrases, generate_scenario_vocab_building_phrases
from src.config_loader import config
from src.gcs_storage import get_phrase_index_path, get_phrase_audio_path, read_from_gcs, get_phrase_path, upload_to_gcs, get_translated_phrases_path
COLLECTION = "COMMUNITY_GARDEN"
config.TARGET_LANGUAGE_NAME

In [None]:
phrases = [
    "How can we improve this?", "Can you carry this bag?"
]

In [None]:

upload_to_gcs(obj=phrases, bucket_name = config.GCS_PRIVATE_BUCKET, file_name = get_phrase_path(collection = COLLECTION))

In [None]:
phrases = read_from_gcs(config.GCS_PRIVATE_BUCKET,
                                  file_path=get_phrase_path(collection=COLLECTION))

In [None]:
# generate english audio

# Generate slow target language audio with word breaks
from audio_generation import clean_tts_text, slow_text_to_speech, text_to_speech
from convert import clean_filename

for phrase in phrases:
    cleaned_phrase = clean_tts_text(phrase)

    target_slow = slow_text_to_speech(
        text=cleaned_phrase,
        config_language="source",
        gender="FEMALE",
        speaking_rate=config.SPEAKING_RATE_SLOW,
        word_break_ms=config.WORD_BREAK_MS,
        voice_setting="phrases",
    )

    # Generate normal target language audio
    target_normal = text_to_speech(
        text=cleaned_phrase,
        config_language="source",
        gender="FEMALE",
        speaking_rate=1.0,
        voice_setting="phrases",
    )

    audio_normal_path =get_phrase_audio_path(phrase_key = clean_filename(phrase), speed="normal", language=config.SOURCE_LANGUAGE_NAME)
    audio_slow_path = get_phrase_audio_path(phrase_key = clean_filename(phrase), speed="slow", language=config.SOURCE_LANGUAGE_NAME)

    upload_to_gcs(obj=target_normal, bucket_name = config.GCS_PRIVATE_BUCKET, file_name = audio_normal_path)
    upload_to_gcs(obj=target_slow, bucket_name = config.GCS_PRIVATE_BUCKET, file_name = audio_slow_path)

In [None]:
# add translations

from src.translation import review_translated_phrases_batch, translate_phrases
language_name_lower = config.TARGET_LANGUAGE_NAME.lower()
translated_phrases_path = get_translated_phrases_path(collection=COLLECTION, language=language_name_lower)
results = dict()
translated_phrases = translate_phrases(phrases)
for phrase, translation in translated_phrases:
        phrase_key = clean_filename(phrase)
        results[phrase_key] = {"english": phrase,
                             language_name_lower: translation}
        
improved_translations = review_translated_phrases_batch(results)
        
upload_to_gcs(improved_translations, bucket_name=config.GCS_PRIVATE_BUCKET, file_name=translated_phrases_path)

In [None]:
from gcs_storage import get_wiktionary_cache_path

from src.wiktionary import add_wiktionary_links

#word_link_cache= read_from_gcs(config.GCS_PRIVATE_BUCKET, file_path = get_wiktionary_cache_path(language_name="English"))
word_link_cache= dict()
phrase_translations, word_link_cache = add_wiktionary_links(improved_translations, word_link_cache, overwrite=True, language="source")



In [None]:
upload_to_gcs(phrase_translations, bucket_name=config.GCS_PRIVATE_BUCKET, file_name=translated_phrases_path)

In [None]:
# assume images done in other notebook.
from phrase import build_phrase_dict_from_gcs

anki_data = build_phrase_dict_from_gcs(collection=COLLECTION,
                                       bucket_name=config.GCS_PRIVATE_BUCKET,
                                       text_language=config.TARGET_LANGUAGE_NAME,
                                       audio_language="English",)
# now generate anki decks

In [None]:
anki_data

In [None]:
from anki_tools import export_phrases_to_anki_learning_english

export_phrases_to_anki_learning_english(anki_data, collection=COLLECTION, deck_name = "Learning English Test")