In [6]:
import os
from langchain_core.messages import HumanMessage
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
groq_api_key = os.getenv("GROQ_API_KEY")

In [4]:
from langchain_groq import ChatGroq
model = ChatGroq(model_name="qwen-2.5-32b")

In [5]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
prompt=ChatPromptTemplate.from_messages(
    [
        (
            'system',
            'You are a helpful language translator assistant your task is convert english language to Bangla language. User will give a input and convert into Bangla Language just translate only.'
        ),
        MessagesPlaceholder(variable_name="messages")
    ]
)
chain=prompt|model

In [None]:
text = "Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"

In [10]:
chain.invoke(
    {'messages': [HumanMessage(content=text)]}
).content

'জেনেটের হংরাজ 16 টি ডিম প্রতিদিন দেয়। সে প্রতিদিন সকালে ব্রেকফাস্টে 3টি ডিম খায় এবং প্রতিদিন তার বন্ধুদের জন্য 4টি ডিম বেক করে মাফিন তৈরি করে। সে বাকি ডিমগুলিকে প্রতিদিন ফার্মার্স মার্কেটে $2 এ বিক্রি করে। সে প্রতিদিন ফার্মার্স মার্কেটে কত টাকা আয় করে?'

In [13]:
import json
import re  # For regular expressions
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage
import os  # for accessing API keys


# Load your Groq API Key from environment variables
groq_api_key = os.environ.get("GROQ_API_KEY")  # IMPORTANT: Set this in your environment
if not groq_api_key:
    raise ValueError("Groq API key not found.  Please set the GROQ_API_KEY environment variable.")


# Initialize Groq model
# model = ChatGroq(model_name="qwen-2.5-32b", groq_api_key=groq_api_key)
model = ChatGroq(model_name="deepseek-r1-distill-llama-70b", groq_api_key=groq_api_key)

# Translation Prompt
# prompt = ChatPromptTemplate.from_messages(
#     [
#         (
#             'system',
#             'You are a helpful language translator assistant.  Your task is to translate English language to Bangla language.  User will give an input, and you must translate it into Bangla. Do not translate any text enclosed in <<...>> or any line starting with ####.  These are mathematical expressions/answers that should remain in their original form.'
#         ),
#         MessagesPlaceholder(variable_name="messages")
#     ]
# )

prompt = ChatPromptTemplate.from_messages(
    [
        (
            'system',
            'You are a helpful language translator assistant. Your task is to translate English language to Bangla language. User will give an input, and you must translate it into Bangla. Do not translate any text enclosed in <<...>> or any line starting with ####. **Also, do NOT translate any numbers; keep them in their original English numeral form.** These are mathematical expressions/answers that should remain in their original form.'
        ),
        MessagesPlaceholder(variable_name="messages")
    ]
)
chain = prompt | model


def translate_text(text):
    """Translates English text to Bangla, preserving calculations."""
    try:
        result = chain.invoke({'messages': [HumanMessage(content=text)]}).content
        return result
    except Exception as e:
        print(f"Translation error: {e}")
        return None  # Handle errors gracefully


def process_gsm8k(file_path):
    """
    Loads the GSM8k JSON file, translates questions and answers,
    and saves the translated data back to a new JSON file.
    """
    with open(file_path, 'r', encoding='utf-8') as f:  # Specify encoding!
        data = json.load(f)

    translated_data = []
    for item in data:
        question_en = item["question"]
        answer_en = item["answer"]

        # Translate Question
        question_bn = translate_text(question_en)

        # Translate Answer (preserving calculations)
        def replace_with_original(match):
            return match.group(0)  # Return the original matched text

        # Split the answer into translatable and non-translatable parts.
        parts = re.split(r'(<<.*?>>|####.*)', answer_en)  # Split by <<...>> and ####...
        translated_parts = []

        for i, part in enumerate(parts):
            if re.match(r'(<<.*?>>|####.*)', part):  # Check if it's a calculation part
                translated_parts.append(part)  # Keep the calculation part as is
            else:
                translated_part = translate_text(part)
                translated_parts.append(translated_part if translated_part else part)  # Translate or keep original if translation fails

        answer_bn = "".join(translated_parts)  # Reassemble the translated answer.


        translated_data_item = {
            "question_en": question_en,
            "answer_en": answer_en,
            "question_bn": question_bn,
            "answer_bn": answer_bn
        }
        translated_data.append(translated_data_item)


    # Save the translated data to a new JSON file
    output_file_path = "gsm8k_bn-deep6.json"
    with open(output_file_path, 'w', encoding='utf-8') as outfile:
        json.dump(translated_data, outfile, indent=4, ensure_ascii=False)  # ensure_ascii=False for Bangla characters

    print(f"Translation complete. Translated data saved to {output_file_path}")


# Example usage:
file_path = "gsm8k.json"  # Replace with the actual path to your file
process_gsm8k(file_path)

Translation complete. Translated data saved to gsm8k_bn-deep6.json
