In [None]:
from googletrans import Translator
import os

INPUT_FILE = './data/curated.txt'
OUTPUT_FILE = './data/translated.txt'
CHUNK_SIZE = 4900  # Keep it below 5000 to be safe

def read_chunks(path, max_len=CHUNK_SIZE):
    with open(path, 'r', encoding='utf-8') as f:
        text = f.read()
    chunks = []
    while text:
        chunk = text[:max_len]
        last_period = chunk.rfind('.')
        if last_period == -1 or len(text) <= max_len:
            last_period = max_len
        chunks.append(text[:last_period].strip())
        text = text[last_period:].strip()
    return chunks

def main():
    translator = Translator()
    chunks = read_chunks(INPUT_FILE)
    print(f"Total chunks: {len(chunks)}")

    with open(OUTPUT_FILE, 'w', encoding='utf-8') as out:
        for i, chunk in enumerate(chunks):
            try:
                result = translator.translate(chunk, src='en', dest='mn')
                out.write(result.text + '\n\n')
                print(f"Translated chunk {i + 1}/{len(chunks)}")
            except Exception as e:
                print(f"Error on chunk {i + 1}: {e}")
                out.write('[TRANSLATION FAILED]\n\n')

if __name__ == '__main__':
    main()
