<a href="https://colab.research.google.com/github/FS75/wo-ai-AI/blob/feature%2Ftext-language-translation/dev.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install fastapi nest-asyncio pyngrok uvicorn transformers sacremoses

In [48]:
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=['*'],
    allow_credentials=True,
    allow_methods=['*'],
    allow_headers=['*'],
)

@app.get('/')
async def root():
    return {'hello': 'world'}

In [49]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer_en_zh = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-zh")
model_en_zh = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-zh")

tokenizer_zh_en = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
model_zh_en = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-zh-en")

In [52]:
# Function to translate text from English to Chinese
def translate_en_to_zh(text):
    input_ids = tokenizer_en_zh.encode(text, return_tensors="pt")
    outputs = model_en_zh.generate(
        input_ids,
        max_length=50,               # Limits the length of the output sequence
        num_beams=10,                # Uses beam search decoding to improve output quality
        no_repeat_ngram_size=2,      # Prevents the model from repeating the same n-grams
        early_stopping=True,         # Stops generation when all beam candidates reach the end token
        repetition_penalty=1.2,      # Penalizes repeating words to ensure diversity in output
        do_sample=True,              # Enables sampling to introduce randomness into output predictions
        temperature=0.2              # Lowers the temperature for more conservative and likely outputs
    )
    translated_text = tokenizer_en_zh.decode(outputs[0], skip_special_tokens=True)
    return translated_text


# Function to translate text from Chinese to English
def translate_zh_to_en(text):
    # Tokenize the text
    input_ids = tokenizer_zh_en.encode(text, return_tensors="pt")

    # Generate the translated output using advanced parameters
    outputs = model_zh_en.generate(
        input_ids,
        max_length=50,               # Limits the length of the output sequence
        num_beams=10,                # Uses beam search decoding to improve output quality
        no_repeat_ngram_size=2,      # Prevents the model from repeating the same n-grams
        early_stopping=True,         # Stops generation when all beam candidates reach the end token
        repetition_penalty=1.2,      # Penalizes repeating words to ensure diversity in output
        do_sample=True,              # Enables sampling to introduce randomness into output predictions
        temperature=0.2              # Lowers the temperature for more conservative and likely outputs
    )

    # Decode and return the translated text
    translated_text = tokenizer_zh_en.decode(outputs[0], skip_special_tokens=True)
    return translated_text

@app.get('/en-to-ch')
async def english_to_chinese(en_text):
  try:
    translated_text = translate_en_to_zh(en_text)

    return translated_text

  except Exception as e:
    return JSONResponse((str(e)))

@app.get('/ch-to-en')
async def chinese_to_english(ch_text):
    try:
      translated_text = translate_en_to_zh(ch_text)

      return translated_text

    except Exception as e:
      return JSONResponse((str(e)))

In [None]:
import nest_asyncio
from pyngrok import ngrok
import uvicorn
from google.colab import userdata
ngrok_auth = userdata.get("NGROK_AUTH_TOKEN")

ngrok.set_auth_token(ngrok_auth)
ngrok_tunnel = ngrok.connect(8000)
api_url = ngrok_tunnel.public_url
print('Public URL:', api_url)
nest_asyncio.apply()
uvicorn.run(app, port=8000)