In [None]:
# Use google translate api to pretranslate all the claims into english. save the translated claims in a seprate folder and summaries in json files. then prompt using direct inference.

In [None]:
from googletrans import Translator

translator = Translator()
translation = translator.translate("Der Himmel ist blau und ich mag Bananen", dest='en')
print(translation.text)
#output: 'The sky is blue and I like bananas'

In [6]:
import os
import json
from googletrans import Translator, LANGUAGES
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

def read_json_file(file_path):
    print(f"\nReading JSON file: {file_path}\n")
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    print(f"Data read from {file_path}: {data}\n")
    return data

def translate_text(text, source_lang, target_lang='en'):
    print(f"\nTranslating text: {text} from {source_lang} to {target_lang}\n")
    
    translator = Translator()
    try:
        result = translator.translate(text, src=source_lang, dest=target_lang)
        print(f"Translated text: {result.text}\n")
        return result.text
    except Exception as e:
        print(f"Error during translation: {e}")
        return text

def process_and_translate_file(file_path, target_lang='en'):
    print(f"\nProcessing and translating file: {file_path}\n")
    data = read_json_file(file_path)
    translated_data = []
    base_dir = os.getcwd()
    output_path = os.path.join(base_dir, os.path.basename(file_path).replace(".json", "_translated.json"))
    for entry in data:
        if 'claim' in entry and 'language' in entry:
            source_lang = entry['language']
            print(f"Translating claim: {entry['claim']} from {source_lang}\n")
            entry['claim'] = translate_text(entry['claim'], source_lang, target_lang)
            print(f"Translated claim: {entry['claim']}\n")
        translated_data.append(entry)
        # Save after each translation to ensure iterative saving
        save_json_file(translated_data, output_path)
    print(f"Translated file saved iteratively to: {output_path}\n")
    return translated_data

def save_json_file(data, file_path):
    print(f"\nSaving JSON file: {file_path}\n")
    with open(file_path, 'w', encoding='utf-8') as file:
        json.dump(data, file, ensure_ascii=False, indent=4)
    print(f"File saved: {file_path}\n")

def main():
    json_files = ["indo_aryan.json", "kartvelian.json", "romance.json", "slavic.json", "turkic.json"]
    base_dir = os.path.join("..", "JSON Files")
    for file_name in json_files:
        file_path = os.path.join(base_dir, file_name)
        print(f"\nProcessing file: {file_path}\n")
        process_and_translate_file(file_path)

if __name__ == "__main__":
    main()



Processing file: ../JSON Files/indo_aryan.json


Processing and translating file: ../JSON Files/indo_aryan.json


Reading JSON file: ../JSON Files/indo_aryan.json

Data read from ../JSON Files/indo_aryan.json: [{'claim': 'ছবিটি সম্প্রতি আমেরিকার মিনিয়াপোলিসে পুলিশের অত্যাচারে মৃত্যু হওয়া জর্জ ফ্লয়েডের', 'label': 'mostly false', 'site': 'dailyo.in', 'language': 'bn'}, {'claim': 'इस वीडियो में निर्माता-निर्देशक महेश भट्ट फिल्म ‘सड़क 2’ के ट्रेलर को बड़े पैमाने पर डिस्लाइक किए जाने की वजह से नाराजगी जता रहे हैं.', 'label': 'false', 'site': 'aajtak.in', 'language': 'hi'}, {'claim': 'सर्वोच्च न्यायालयाने 15 जून पासून देशाचे नाव प्रत्येक भाषेत फक्त भारत असेल असा निर्णय दिला आहे', 'label': 'false', 'site': 'marathi.newschecker.in', 'language': 'mr'}, {'claim': 'ਸੋਸ਼ਲ ਮੀਡੀਆ ਤੇ ਦਿੱਲੀ ਦੇ ਮੁੱਖ ਮੰਤਰੀ ਅਰਵਿੰਦ ਕੇਜਰੀਵਾਲ ਅਤੇ ਰਿਲਾਇੰਸ ਦੇ ਮੁਖੀ ਮੁਕੇਸ਼ ਅੰਬਾਨੀ ਦੇ ਜੱਫੀ ਪਾਉਂਦਿਆਂ ਦੀ ਇਕ ਤਸਵੀਰ ਵਾਇਰਲ ਹੋ ਰਹੀ ਹੈ।', 'label': 'half true', 'site': 'punjabi.newschecker.in', 'language': 'pa'}, {'claim': 'ঘুমন্ত নারীর মত