## 0. Preliminary

In [14]:
import pickle
import os
import pandas as pd
import sys
import urllib.request
import requests

### Augmentation Functions
- augment: Input(text) is translated into the target languages(tgt_lang), and finally into the source language. Translation module is needed.
- augment_combination: Augment is performed with given target language combinations.

In [7]:
def augment(text: str, src_lang: str, tgt_lang: (list or str), module) -> str:
    if type(tgt_lang) == str:
        tgt_lang = [tgt_lang]
    
    for i in range(len(tgt_lang)):
        if i == 0:
            mid_text = module(text, src_lang, tgt_lang[0])
        else:
            mid_text = module(mid_text, tgt_lang[i-1], tgt_lang[i])
    aug_text = module(mid_text, tgt_lang[-1], src_lang)
    return aug_text

def augment_combination(text: str, src_lang: str, tgt_lang_comb: list, module) -> list:
    return [augment(text, src_lang, tgt_lang, module) for tgt_lang in tgt_lang_comb]

## 1. PORORO: Platform Of neuRal mOdels for natuRal language prOcessing
- A Deep Learning based Multilingual Natural Language Processing Library
- github: https://github.com/kakaobrain/pororo
- Language ID: korean='ko', english='en', japanese='ja', chinese='zh'

In [4]:
!apt-get install -y g++
!pip install pororo

from pororo import Pororo
trans = Pororo(task='mt', lang='multi')




g++ is already the newest version (4:7.4.0-1ubuntu2.3).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


### Examples

In [9]:
augment_combination('The rainbow is a division of white light into many beautiful colours.', 'en', [['ko'], ['ja'], ['zh'], ['zh', 'ja']], trans)

['The rainbow is to divide white in many beautiful colors.',
 'The rainbow has a white light in color.',
 'Rainbow is a lot of beautiful colors in white light.',
 'Rainbow has turned white light into a lot of beautiful colors']

## 2. PAPAGO: the neural machine translator served to Naver (https://papago.naver.com/).
- If you want to use this, you need to get a translation API at Naver Developers (https://developers.naver.com/docs/papago/papago-nmt-overview.md) and fill the client_id and client_secret in the below code.
- Free API is limited to 10,000 characters per day.
- Language ID: korean='ko', english='en', japanese='ja', chinese='zh-CN' (https://developers.naver.com/docs/papago/papago-nmt-api-reference.md)

In [12]:
def trans(text, src_lang, tgt_lang):
    client_id = "YOUR_CLIENT_ID" # <-- client_id 
    client_secret = "YOUR_CLIENT_SECRET" # <-- client_secret

    data = {'text' : text,
            'source' : src_lang,
            'target': tgt_lang}

    url = "https://openapi.naver.com/v1/papago/n2mt"

    header = {"X-Naver-Client-Id":client_id,
              "X-Naver-Client-Secret":client_secret}

    response = requests.post(url, headers=header, data=data)
    rescode = response.status_code

    if(rescode==200):
        send_data = response.json()
        trans_data = (send_data['message']['result']['translatedText'])
        return trans_data
    else:
        print("Error Code:" , rescode)

### Examples

In [15]:
augment_combination('The rainbow is a division of white light into many beautiful colours.', 'en', [['ko'], ['ja'], ['zh-CN'], ['zh-CN', 'ja']], trans)

['A rainbow is a white color divided into many beautiful colors.',
 'A rainbow is a division of white light into many beautiful colors.',
 'A rainbow divides white light into many beautiful colors.',
 'The rainbow divided the white light into many beautiful colors.']

## 3. Google Cloud Translate
- If you want to use this, you need to get a translation API at Google Cloud Platform (https://cloud.google.com/translate) and fill the GOOGLE_APPLICATION_CREDENTIALS in the below code.
- Language ID: english='en', japan='ja', chinese='zh_CN', finnish='fi', svenska='sv', welsh='cy' (https://cloud.google.com/translate/docs/languages)

In [None]:
!pip install --upgrade google-cloud-translate
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "YOUR_JSON_PATH"
from google.cloud import translate_v2 as tr

client = tr.Client()

### Examples

In [None]:
augment_combination('The rainbow is a division of white light into many beautiful colours.', 'en', [['ko'], ['ja'], ['zh-CN'], ['zh-CN', 'ja']], client.translate)