In [30]:
import json
import requests
import tqdm
import re

# Read the facts.json file
with open('output/facts.json', 'r', encoding='utf-8') as f:
    facts = json.load(f)

filenames = [
    ("en_Denys_Shmyhal.txt", "ru_Шмыгаль,_Денис_Анатольевич.txt"),
    ("en_Maria_Lvova-Belova.txt", "ru_Львова-Белова,_Мария_Алексеевна.txt"),
    ("en_Olha_Stefanishyna.txt", "ru_Стефанишина,_Ольга_Витальевна.txt"),
    ("en_Sergey_Lavrov.txt", "ru_Лавров,_Сергей_Викторович.txt"),
    ("en_Vladimir_Putin.txt", "ru_Путин,_Владимир_Владимирович.txt"),
    ("en_Volodymyr_Zelenskyy.txt", "ru_Зеленский,_Владимир_Александрович.txt")
]

def analyse_en_fact(en_fact, ru_facts, ollama_url="http://localhost:11434/api/generate"):
    en_prompt = f"""
    Consider the English fact: {en_fact} Is this fact fully inferrable from the following Russian facts?\n {ru_facts}. Return either ONLY 'yes' or ONLY 'no'.
    """
    payload = {
        "model": "gemma3:1b",
        "prompt": en_prompt,
        "stream": False
    }
    response = requests.post(ollama_url, json=payload)
    response.raise_for_status()
    return response.json().get("response", "")

def analyse_ru_fact(ru_fact, en_facts, ollama_url="http://localhost:11434/api/generate"):
    ru_prompt = f"""
    Рассмотрим следующий факт на русском языке: {ru_fact} Можно ли этот факт полностью вывести из следующих фактов на английском языке?\n {en_facts}. Отвечайте только «да» или только «нет».
    """
    payload = {
        "model": "gemma3:1b",
        "prompt": ru_prompt,
        "stream": False
    }
    response = requests.post(ollama_url, json=payload)
    response.raise_for_status()
    return response.json().get("response", "")

def classify_fact(fact, language, ollama_url="http://localhost:11434/api/generate"):
    en_prompt = f"""
    Consider the English fact: {fact}. Does this fact imply a positive or negative sentiment towards this person? Or is it simply stating a fact (neutral)? Explain in one sentence.\n Write your response in JSON format with two keys: label (pos/neutral/neg) and explanation.
    """
    ru_prompt = f"""
    Рассмотрим английский факт: {fact}. Подразумевает ли этот факт положительное или отрицательное отношение к этому человеку? Или он просто констатирует факт (neutral)? Объясните одним предложением.\n Напишите свой ответ в формате JSON с двумя ключами: label (pos/neutral/neg) и explanation.
    """
    payload = {
        "model": "gemma3:1b",
        "prompt": en_prompt if language == 'en' else ru_prompt,
        "stream": False
    }
    response = requests.post(ollama_url, json=payload)
    response.raise_for_status()
    return response.json().get("response", "")

def parse_response(response):

    # Remove triple backticks and optional 'json' after them
    cleaned = re.sub(r"^```(?:json)?\s*|\s*```$", "", response.strip(), flags=re.IGNORECASE)
    try:
        data = json.loads(cleaned)
        return data.get("label", "").strip(), data.get("explanation", "").strip()
    except Exception:
        return "", ""

all_results = [] 
for en_file, ru_file in filenames:
    print(f"Processing files: {en_file} and {ru_file}")
    en_facts = next((item['facts'] for item in facts if item['filename'] == en_file), [])
    ru_facts = next((item['facts'] for item in facts if item['filename'] == ru_file), [])
    results = {
        "en_file": en_file,
        "ru_file": ru_file,
        "en_to_ru": [],
        "ru_to_en": [],
    }

    # EN→RU analysis
    for en_fact in tqdm.tqdm(en_facts, desc=f"EN→RU: {en_file}", leave=False):
        result = analyse_en_fact(en_fact, ru_facts).strip()
        sentiment = parse_response(classify_fact(en_fact, 'en'))
        results["en_to_ru"].append({"en_fact": en_fact, "result": result, "sentiment": sentiment[0], "explanation": sentiment[1]})

    # RU→EN analysis
    for ru_fact in tqdm.tqdm(ru_facts, desc=f"RU→EN: {ru_file}", leave=False):
        result = analyse_ru_fact(ru_fact, en_facts).strip()
        sentiment = parse_response(classify_fact(ru_fact, 'ru'))
        results["ru_to_en"].append({"ru_fact": ru_fact, "result": result, "sentiment": sentiment[0], "explanation": sentiment[1]}) 
        # TODO MAYBE try a run with english prompt for russian facts, then we can interpret the sentiment better with explanation

    all_results.append(results)

with open('output/analysis_results.json', 'w', encoding='utf-8') as out_f:
    json.dump(all_results, out_f, ensure_ascii=False, indent=2)

Processing files: en_Denys_Shmyhal.txt and ru_Шмыгаль,_Денис_Анатольевич.txt


                                                                                       

Processing files: en_Maria_Lvova-Belova.txt and ru_Львова-Белова,_Мария_Алексеевна.txt


                                                                                            

Processing files: en_Olha_Stefanishyna.txt and ru_Стефанишина,_Ольга_Витальевна.txt


                                                                                          

Processing files: en_Sergey_Lavrov.txt and ru_Лавров,_Сергей_Викторович.txt


                                                                                        

Processing files: en_Vladimir_Putin.txt and ru_Путин,_Владимир_Владимирович.txt


                                                                                           

Processing files: en_Volodymyr_Zelenskyy.txt and ru_Зеленский,_Владимир_Александрович.txt


                                                                                              