    Copyright 2021 Google LLC

    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

        https://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.


# Dialogflow CX Bot Language Translation (Translation Notebook)

Contains functions that are re-used in the Main Notebook

1. Functions for Translating texts
2. Functions for CX component specific translations

[Public Doc Link: Python Client for Cloud Translate](https://cloud.google.com/translate/docs/reference/libraries/v3/python)

# Translation

## This Notebook Env:

In [None]:
# !python3 -V
# !python3 -m pip list | wc -l
# !python3 -m pip list | grep google

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

## Google Translate & Vars

In [None]:
#  Google Cloud Translate

# from google.cloud.translate_v3beta1.services.translation_service import TranslationServiceClient
# from google.cloud.translate_v3beta1.types.translation_service import TranslateTextRequest, TranslateTextResponse, Translation
# Cloud_Translate_Parent = ''
# translate_client = TranslationServiceClient()

## Functions: Helpers & Core Translation

In [None]:
####################################
def set_no_translate_markings(text):
    return '<span translate="no">' + text + '</span>'

#####################################
def eval_no_translate_markings(text):
    index = text.find('$')
    #print(f"0: {index}")
    if index != -1:
        hasMore = True
        while (hasMore):
            text = text[:index] + '<span translate="no">' + text[index:]
            index = text.find(' ', index+len('<span translate="no">'))
            if index == -1:
                text = text + '</span>'
            else:
                text = text[:index] + '</span>' + text[index:]
                index = text.find('$', index+len('</span>'))
            hasMore = index != -1
        return text
    else:
        return text
    
####################################
def remove_translate_markings(text):
    text = text.replace('<span translate="no">','')
    text = text.replace('</span>','')
    return text

##########################################################
def translate(texts, source_lang, target_lang, mime_type):
    if len(texts) == 0:
        return []
    
    translated_list = []

    ### According to https://cloud.google.com/translate/docs/reference/rest/v3beta1/projects/translateText#request-body
    ### Translation limits at texts[] < 1024 and total < 30k chars/codepoints
    ### - keep text_list within 1024 & also < 30k chars
    ### - count chars
    char_count = 0
    for text in texts:
        char_count += len(text)
    if len(texts) > 1023 or char_count > 30000:
        #limit exceeded, chop it up
        new_texts = []
        text_list = []
        char_count = 0
        list_count = 0
        for text in texts:
            char_count += len(text)
            list_count += 1
            if char_count > 30000 or list_count > 1023:
                new_texts.append(text_list)
                text_list = []
                text_list.append(text)
                char_count = len(text)
                list_count = 1
            else:
                text_list.append(text)
        new_texts.append(text_list)        
        
        for texts in new_texts:
            req = TranslateTextRequest()
            req.contents = texts
            req.mime_type = mime_type
            req.source_language_code = source_lang
            req.target_language_code = target_lang
            req.parent = Cloud_Translate_Parent
            translations = translate_client.translate_text(req).translations
            for translation in translations:
                if mime_type == 'text/html':
                    translated_list.append( [html.unescape( remove_translate_markings(translation.translated_text) )] )
                else:
                    translated_list.append([translation.translated_text])
        return translated_list
        
    else: # did not exceed limit
        req = TranslateTextRequest()
        req.contents = texts
        req.mime_type = mime_type
        req.source_language_code = source_lang
        req.target_language_code = target_lang
        req.parent = Cloud_Translate_Parent
        translations = translate_client.translate_text(req).translations
        for translation in translations:
            if mime_type == 'text/html':
                translated_list.append( [html.unescape( remove_translate_markings(translation.translated_text) )] )
            else:
                translated_list.append([translation.translated_text])
        return translated_list
    

## Translate CX Functions

In [None]:
##################################################
def get_source_texts_for_translate(component, df):
    if component == SheetsName.Training_Phrases:
        source_lang_col = 2
    elif component == SheetsName.Pages:
        source_lang_col = 6
    else: #Entities, Flows & Route_Groups
        source_lang_col = 4

    source_lang = df.iloc[1,source_lang_col] #for translate
    print(f'translate_cx({component.name}): get_source_texts_for_translate({component.name},df) - Reading {len(df)} rows from Sheets')
    row = 2
    input_texts = []
    while row < len(df):
        if component == SheetsName.Training_Phrases or (component == SheetsName.Entities and df.iloc[row,2] == '1'):
            input_texts.append(df.iloc[row, source_lang_col])
        elif component == SheetsName.Entities and df.iloc[row,2] != '1':
            input_texts.append( set_no_translate_markings(df.iloc[row, source_lang_col]) )
        else: #Flows, Pages & Route_Groups
            input_texts.append( eval_no_translate_markings(df.iloc[row, source_lang_col]) )
        row += 1
        
    return input_texts

################################################################################
def translate_and_write_to_sheets(component, df, input_texts, target_lang, is_one_lang_only):
    if component == SheetsName.Training_Phrases:
        source_lang_col = 2
        target_lang_starting_col = 3
        mime_type = 'text/html'
    elif component == SheetsName.Pages:
        source_lang_col = 6
        target_lang_starting_col = 7
        mime_type = 'text/html'
    else: #Entities, Flows & Route_Groups
        source_lang_col = 4
        target_lang_starting_col = 5
        mime_type = 'text/html'
    
    source_lang = df.iloc[1,source_lang_col] #for translate
    
    if target_lang != None:
        col = df.columns.get_loc(target_lang)
    else:
        col = target_lang_starting_col
    
    if is_one_lang_only:
        target_lang = df.iloc[1,col] #for translate
        write_result(component.name, col+1, 4, [['translating...']])
        print(f'translate_cx({component.name}): translation target language = [{target_lang}]')
        translated_texts = translate(input_texts, source_lang, target_lang, mime_type)
        write_result(component.name,col+1, 4, translated_texts)        
    else:
        while col < len(df.columns):
            target_lang = df.iloc[1,col] #for translate
            write_result(component.name, col+1, 4, [['translating...']])
            print(f'translate_cx({component.name}): translation target language = [{target_lang}]')
            translated_texts = translate(input_texts, source_lang, target_lang, mime_type)
            write_result(component.name,col+1, 4, translated_texts)
            col += 1

    
############################
def translate_cx(component):
    start_time = timer()
    print(f"START:\t\ttranslate_cx({component})")
    df = read_sheet(component.name)
    print(f'\ntranslate_cx({component.name}): num rows in sheets: {len(df)}')
    input_texts = get_source_texts_for_translate(component, df)
    if len(input_texts) != 0: #something to translate
        translate_and_write_to_sheets(component, df, input_texts, None, False)
    print(f"\nCOMPLETED:\ttranslate_cx({component}) in {timer()-start_time}s\n")
    
##########################################
def translate_cx_by_lang(component, lang):
    start_time = timer()
    print(f"START:\t\ttranslate_cx_by_lang({component}, {lang})")
    df = read_sheet(component.name)
    print(f'\ntranslate_cx_by_lang({component.name},{lang}): num rows in sheets: {len(df)}')
    input_texts = get_source_texts_for_translate(component, df)
    if len(input_texts) != 0: #something to translate
        translate_and_write_to_sheets(component, df, input_texts, lang, True)
    print(f"\nCOMPLETED:\ttranslate_cx_by_lang({component}, {lang}) in {timer()-start_time}s\n")
    
############################################
def translate_cx_from_lang(component, lang):
    start_time = timer()
    print(f"START:\t\ttranslate_cx_from_lang({component}, {lang})")
    df = read_sheet(component.name)
    print(f'\ntranslate_cx_from_lang({component.name},{lang}): num rows in sheets: {len(df)}')
    input_texts = get_source_texts_for_translate(component, df)
    if len(input_texts) != 0: #something to translate
        translate_and_write_to_sheets(component, df, input_texts, lang, False)
    print(f"\nCOMPLETED:\ttranslate_cx_from_lang({component}, {lang}) in {timer()-start_time}s\n")

# END

In [None]:
print('Translation Notebook: RAN successfully to desired point')