In [1]:
import os
from langchain_core.messages import HumanMessage
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
groq_api_key = os.getenv("GROQ_API_KEY")

In [3]:
from langchain_groq import ChatGroq
model = ChatGroq(model_name="qwen-2.5-32b")

In [5]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
prompt=ChatPromptTemplate.from_messages(
    [
        (
            'system',
            'You are a helpful language translator assistant your task is convert english language to Bangla language. User will give a input and convert into Bangla Language just translate only.'
        ),
        MessagesPlaceholder(variable_name="messages")
    ]
)
chain=prompt|model

In [None]:
text = "Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"

In [10]:
chain.invoke(
    {'messages': [HumanMessage(content=text)]}
).content

'জেনেটের হংরাজ 16 টি ডিম প্রতিদিন দেয়। সে প্রতিদিন সকালে ব্রেকফাস্টে 3টি ডিম খায় এবং প্রতিদিন তার বন্ধুদের জন্য 4টি ডিম বেক করে মাফিন তৈরি করে। সে বাকি ডিমগুলিকে প্রতিদিন ফার্মার্স মার্কেটে $2 এ বিক্রি করে। সে প্রতিদিন ফার্মার্স মার্কেটে কত টাকা আয় করে?'

In [None]:
import json
import re  # For regular expressions
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage
import os  # for accessing API keys


# Load your Groq API Key from environment variables
groq_api_key = os.environ.get("GROQ_API_KEY")  # IMPORTANT: Set this in your environment
if not groq_api_key:
    raise ValueError("Groq API key not found.  Please set the GROQ_API_KEY environment variable.")


# Initialize Groq model
model = ChatGroq(model_name="qwen-2.5-32b", groq_api_key=groq_api_key)


prompt = ChatPromptTemplate.from_messages(
    [
        (
            'system',
            'You are a highly precise and professional language translator assistant. Your ONLY task is to translate English language text to Bangla language. You are given ONE piece of text as input, and you MUST return ONLY the Bangla translation of that text. Do not add any introductory or concluding phrases, questions, or conversational elements. Do not translate any text enclosed in <<...>> or any line starting with ####. Also, do NOT translate any numbers; keep them in their original English numeral form. Mathematical expressions/answers must remain in their original form. Return ONLY the translation.'
        ),
        MessagesPlaceholder(variable_name="messages")
    ]
)
chain = prompt | model


def translate_text(text):
    """Translates English text to Bangla, preserving calculations."""
    try:
        result = chain.invoke({'messages': [HumanMessage(content=text)]}).content
        return result
    except Exception as e:
        print(f"Translation error: {e}")
        return None  # Handle errors gracefully


def process_gsm8k(file_path):
    """
    Loads the GSM8k JSON file, translates questions and answers,
    and saves the translated data back to a new JSON file.
    """
    with open(file_path, 'r', encoding='utf-8') as f:  # Specify encoding!
        data = json.load(f)

    translated_data = []
    for item in data:
        question_en = item["question"]
        answer_en = item["answer"]

        # Translate Question
        question_bn = translate_text(question_en)

        # Translate Answer (preserving calculations)
        def replace_with_original(match):
            return match.group(0)  # Return the original matched text

        # Split the answer into translatable and non-translatable parts.
        parts = re.split(r'(<<.*?>>|####.*)', answer_en)  # Split by <<...>> and ####...
        translated_parts = []

        for i, part in enumerate(parts):
            if re.match(r'(<<.*?>>|####.*)', part):  # Check if it's a calculation part
                translated_parts.append(part)  # Keep the calculation part as is
            else:
                translated_part = translate_text(part)
                translated_parts.append(translated_part if translated_part else part)  # Translate or keep original if translation fails

        answer_bn = "".join(translated_parts)  # Reassemble the translated answer.


        translated_data_item = {
            "question_en": question_en,
            "answer_en": answer_en,
            "question_bn": question_bn,
            "answer_bn": answer_bn
        }
        translated_data.append(translated_data_item)


    # Save the translated data to a new JSON file
    output_file_path = "gsm8k_bn-deep6x.json"
    with open(output_file_path, 'w', encoding='utf-8') as outfile:
        json.dump(translated_data, outfile, indent=4, ensure_ascii=False)  # ensure_ascii=False for Bangla characters

    print(f"Translation complete. Translated data saved to {output_file_path}")


# Example usage:
file_path = "gsm8k.json"  # Replace with the actual path to your file
process_gsm8k(file_path)

Translation complete. Translated data saved to gsm8k_bn-deep6x.json


In [9]:
text = "বাড়ির ও আবাসিক পরিবর্তনের খরচ 80,000+50,000=$<<80000+50000=130000>>130,000\nতিনি বাড়ির মূল্য বাড়ালেন 80,000*1.5=<<80000*1.5=120000>>120,000\nতাই বাড়ির নতুন মূল্য হল 120,000+80,000=$<<120000+80000=200000>>তাই তিনি ২০০,০০০-১৩০,০০০=$ লাভ করলেন<<200000-130000=70000>>70,000#### 70000কিভাবে আপনি কলেজে পড়াশোনা করেন?"


In [None]:
import re


cleaned_text = re.sub(r"(#### \d+)(.*)$", r"\1", text)

cleaned_text

'বাড়ির ও আবাসিক পরিবর্তনের খরচ 80,000+50,000=$<<80000+50000=130000>>130,000\nতিনি বাড়ির মূল্য বাড়ালেন 80,000*1.5=<<80000*1.5=120000>>120,000\nতাই বাড়ির নতুন মূল্য হল 120,000+80,000=$<<120000+80000=200000>>তাই তিনি ২০০,০০০-১৩০,০০০=$ লাভ করলেন<<200000-130000=70000>>70,000#### 70000'

In [16]:
text = "এটি 2/2=<<2/2=1>>1 বল্ট সাদা ফিবার\nসুতরাং কাপড়ের মোট পরিমাণ হল 2+1=<<2+1=3>>3 টি ফেরনি#### 3আপনি কোনো টেক্সট দেননি। আমি শুধু বাঙালি অনুবাদ দিব।"

In [17]:
cleaned_text = re.sub(r"(#### \d+)(.*)$", r"\1", text)

cleaned_text

'এটি 2/2=<<2/2=1>>1 বল্ট সাদা ফিবার\nসুতরাং কাপড়ের মোট পরিমাণ হল 2+1=<<2+1=3>>3 টি ফেরনি#### 3'

In [19]:
import json
import re  # For regular expressions
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage
import os  # for accessing API keys




prompt = ChatPromptTemplate.from_messages(
    [
        (
            'system',
            'You are a highly precise and professional language translator assistant. Your ONLY task is to translate English language text to Bangla language. You are given ONE piece of text as input, and you MUST return ONLY the Bangla translation of that text. Do not add any introductory or concluding phrases, questions, or conversational elements. Do not translate any text enclosed in <<...>> or any line starting with ####. Also, do NOT translate any numbers; keep them in their original English numeral form. Mathematical expressions/answers must remain in their original form. Return ONLY the translation.'
        ),
        MessagesPlaceholder(variable_name="messages")
    ]
)
chain = prompt | model


def translate_text(text):
    """Translates English text to Bangla, preserving calculations and removing extra sentences."""
    try:
        result = chain.invoke({'messages': [HumanMessage(content=text)]}).content

       

        # Remove sentence after ####
        # str(result)
        # result = re.sub(r"(#### \d+)(.*)$", r"\1", result)

        return result
    except Exception as e:
        print(f"Translation error: {e}")
        return None  # Handle errors gracefully


def process_gsm8k(file_path):
    """
    Loads the GSM8k JSON file, translates questions and answers,
    and saves the translated data back to a new JSON file.
    """
    with open(file_path, 'r', encoding='utf-8') as f:  # Specify encoding!
        data = json.load(f)

    translated_data = []
    for item in data:
        question_en = item["question"]
        answer_en = item["answer"]

        # Translate Question
        question_bn = translate_text(question_en)

        # Translate Answer (preserving calculations)
        def replace_with_original(match):
            return match.group(0)  # Return the original matched text

        # Split the answer into translatable and non-translatable parts.
        parts = re.split(r'(<<.*?>>|####.*)', answer_en)  # Split by <<...>> and ####...
        translated_parts = []

        for i, part in enumerate(parts):
            if re.match(r'(<<.*?>>|####.*)', part):  # Check if it's a calculation part
                translated_parts.append(part)  # Keep the calculation part as is
            else:
                translated_part = translate_text(part)
                translated_parts.append(translated_part if translated_part else part)  # Translate or keep original if translation fails

        answer_bn = "".join(translated_parts)  # Reassemble the translated answer.

        answer_bn = re.sub(r"(#### \d+)(.*)$", r"\1", answer_bn)
        
        translated_data_item = {
            "question_en": question_en,
            "answer_en": answer_en,
            "question_bn": question_bn,
            "answer_bn": answer_bn
        }
        translated_data.append(translated_data_item)


    # Save the translated data to a new JSON file
    output_file_path = "gsm8k_bn-deepy6x.json"
    with open(output_file_path, 'w', encoding='utf-8') as outfile:
        json.dump(translated_data, outfile, indent=4, ensure_ascii=False)  # ensure_ascii=False for Bangla characters

    print(f"Translation complete. Translated data saved to {output_file_path}")


# Example usage:
file_path = "gsm8k.json"  # Replace with the actual path to your file
process_gsm8k(file_path)

Translation complete. Translated data saved to gsm8k_bn-deepy6x.json


In [1]:
import json
import os
from typing import List
import re
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage
from pydantic import BaseModel, Field  # Use pydantic.v1 if needed

# Load your Groq API Key from environment variables
groq_api_key = os.environ.get("GROQ_API_KEY")  # IMPORTANT: Set this in your environment
if not groq_api_key:
    raise ValueError("Groq API key not found.  Please set the GROQ_API_KEY environment variable.")

# Initialize Groq model



In [2]:
import json
import re  # For regular expressions
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage
import os  # for accessing API keys

model = ChatGroq(model_name="llama-3.3-70b-specdec", groq_api_key=groq_api_key)


prompt = ChatPromptTemplate.from_messages(
    [
        (
            'system',
            'You are a highly precise and professional language translator assistant. Your ONLY task is to translate English language text to Bangla language. You are given ONE piece of text as input, and you MUST return ONLY the Bangla translation of that text. Do not add any introductory or concluding phrases, questions, or conversational elements. Do not translate any text enclosed in <<...>> or any line starting with ####. Also, do NOT translate any numbers; keep them in their original English numeral form. Mathematical expressions/answers must remain in their original form. Return ONLY the translation.'
        ),
        MessagesPlaceholder(variable_name="messages")
    ]
)
chain = prompt | model


def translate_text(text):
    """Translates English text to Bangla, preserving calculations and removing extra sentences."""
    try:
        result = chain.invoke({'messages': [HumanMessage(content=text)]}).content

       

        # Remove sentence after ####
        # str(result)
        # result = re.sub(r"(#### \d+)(.*)$", r"\1", result)

        return result
    except Exception as e:
        print(f"Translation error: {e}")
        return None  # Handle errors gracefully


def process_gsm8k(file_path):
    """
    Loads the GSM8k JSON file, translates questions and answers,
    and saves the translated data back to a new JSON file.
    """
    with open(file_path, 'r', encoding='utf-8') as f:  # Specify encoding!
        data = json.load(f)

    translated_data = []
    for item in data:
        question_en = item["question"]
        answer_en = item["answer"]

        # Translate Question
        question_bn = translate_text(question_en)

        # Translate Answer (preserving calculations)
        def replace_with_original(match):
            return match.group(0)  # Return the original matched text

        # Split the answer into translatable and non-translatable parts.
        parts = re.split(r'(<<.*?>>|####.*)', answer_en)  # Split by <<...>> and ####...
        translated_parts = []

        for i, part in enumerate(parts):
            if re.match(r'(<<.*?>>|####.*)', part):  # Check if it's a calculation part
                translated_parts.append(part)  # Keep the calculation part as is
            else:
                translated_part = translate_text(part)
                translated_parts.append(translated_part if translated_part else part)  # Translate or keep original if translation fails

        answer_bn = "".join(translated_parts)  # Reassemble the translated answer.

        answer_bn = re.sub(r"(#### \d+)(.*)$", r"\1", answer_bn)
        
        translated_data_item = {
            "question_en": question_en,
            "answer_en": answer_en,
            "question_bn": question_bn,
            "answer_bn": answer_bn
        }
        translated_data.append(translated_data_item)


    # Save the translated data to a new JSON file
    output_file_path = "gsm8k_bn-deepy6y.json"
    with open(output_file_path, 'w', encoding='utf-8') as outfile:
        json.dump(translated_data, outfile, indent=4, ensure_ascii=False)  # ensure_ascii=False for Bangla characters

    print(f"Translation complete. Translated data saved to {output_file_path}")


# Example usage:
file_path = "gsm8k.json"  # Replace with the actual path to your file
process_gsm8k(file_path)

Translation complete. Translated data saved to gsm8k_bn-deepy6y.json
