In [4]:
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
import sacrebleu

# Load tokenizer and model for Burmese to English translation
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")

# Define a function to translate text from Burmese to English
def translate_my_to_en(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    print("Input tokens:", inputs["input_ids"])
    outputs = model.generate(**inputs)
    translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return translated_text

# Test the translation function
burmese_text = "မင်္ဂလာပါ။ ဘယ်လိုလဲ?"
translated_english = translate_my_to_en(burmese_text)
print("Burmese to English:", translated_english)

# Evaluate the translation quality using BLEU score
reference = ["Hello, how are you?"]
candidate = translated_english
bleu_score = sacrebleu.corpus_bleu(candidate, [reference])
print("BLEU score:", bleu_score.score)


generation_config.json: 100%|██████████| 233/233 [00:00<00:00, 46.6kB/s]


Input tokens: tensor([[128022,   4139,   8571,  10290,  15460,  10289,  18709,  18412,  15146,
          10601,  18202,     24,      2]])
Burmese to English: Қандай? – Қандай?
BLEU score: 0.0


In [6]:
from transformers import MarianMTModel, MarianTokenizer
import sacrebleu

# Load tokenizer and model for Burmese to English translation
model_name = "Helsinki-NLP/opus-mt-my-en"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

# Define a function to translate text from Burmese to English
def translate_my_to_en(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    outputs = model.generate(**inputs)
    translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return translated_text

# Test the translation function
burmese_text = "ဟယ်လို့ရပါတယ်။"
translated_english = translate_my_to_en(burmese_text)
print("Burmese to English:", translated_english)


Burmese to English: ഷീൽഡ്


In [20]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from googletrans import Translator
import re

# Download NLTK resources (only required once)
nltk.download('punkt')
nltk.download('stopwords')

# Define preprocessing function
def preprocess_text(text):
    # Tokenize the text
    tokens = word_tokenize(text)

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word for word in tokens if word.lower() not in stop_words]

    # Capitalize the first letter in each sentence
    preprocessed_text = ' '.join([word.capitalize() if i == 0 or tokens[i - 1] in ['.', '!', '?'] else word for i, word in enumerate(filtered_tokens)])

    return preprocessed_text

# Define postprocessing function
def postprocess_text(text):
    # Capitalize the first letter of the entire text
    postprocessed_text = text.capitalize()

    return postprocessed_text

# Define translation function
def translate_my_to_en(text):
    # Preprocess text
    preprocessed_text = preprocess_text(text)

    # Translate text from Burmese to English
    translator = Translator()
    translated_text = translator.translate(preprocessed_text, src='my', dest='en').text

    # Postprocess translated text
    postprocessed_text = postprocess_text(translated_text)

    return postprocessed_text

# Test the translation function with a sample Burmese text
burmese_text = "မင်းနာမည်က ဘာလဲ၊ မင်းအသက်ဘယ်လောက်လဲ။"
translated_english = translate_my_to_en(burmese_text)
print("Burmese Text:", burmese_text)
print("Translated English Text:", translated_english)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\amir.rahman\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\amir.rahman\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Burmese Text: မင်းနာမည်က ဘာလဲ၊ မင်းအသက်ဘယ်လောက်လဲ။
Translated English Text: What is your name?how old are you?
