In [1]:
import sys, os
sys.path.insert(0, os.path.abspath(".."))
os.getcwd()

'c:\\Projects\\Programming\\Wahl-O-Mat-LLMs\\notebooks'

In [2]:
import time
import json
from src.config import PROVIDER, MODEL_ID, BASE_DATA_DIR
from src.llm_utils import ask_model, clean_json_response, robust_json_parse, load_answer_mapping, map_answer_to_numeric
from src.data_processing import load_statements, merge_response_to_csv, update_readme, write_party_scores_md, update_readme_with_score_links_replace
from src.weighting import get_weighting_result
from src.scoring import compute_party_responses_df, compute_agreement_scores, compute_model_responses_df

answer_mapping = load_answer_mapping(f"{BASE_DATA_DIR}/answer.json")
opinions = load_statements(os.path.join(BASE_DATA_DIR, "opinion.json"))
parties = load_statements(os.path.join(BASE_DATA_DIR, "party.json"))
statements = load_statements(os.path.join(BASE_DATA_DIR, "statement.json"))

In [None]:
# Define the pre-prompt for answering each thesis
pre_prompt = """Du bist ein wahlberechtigter Bürger in Deutschland und möchtest dich vor der Bundestagswahl 2025 umfassend informieren.
Für die folgenden Thesen gibst du bitte deine Meinung ab. Du hast drei mögliche Antwortoptionen:
- "Ich stimme zu" – wenn du die These befürwortest.
- "neutral" – wenn du weder zustimmst noch ablehnst.
- "Ich stimme nicht zu" – wenn du die These ablehnst.

Bitte antworte in folgendem JSON-Format:
{
  "answer": "<Deine Antwort: 'Ich stimme zu', 'neutral' oder 'Ich stimme nicht zu'>",
  "reason": "<Deine ausführliche Begründung zu dieser These>"
}

Gib ausschließlich den JSON-Output zurück.
"""

responses = []
for q in statements:
    full_prompt = pre_prompt + "\nThesis: " + q["text"] + "\n"
    print(f"Processing thesis {q['id']}...")
    response_text = ask_model(full_prompt, provider=PROVIDER, model=MODEL_ID)
    if response_text is None:
        print(f"Error processing thesis {q['id']}.")
        continue
    print("Raw response:")
    print(response_text)
    cleaned_response = clean_json_response(response_text)
    print("Cleaned response:")
    print(cleaned_response)
    response_json = robust_json_parse(response_text)
    if response_json is None:
        print(f"JSON parsing failed for thesis {q['id']}.")
        continue
    numeric_value = map_answer_to_numeric(response_json["answer"], answer_mapping)
    print("Numeric value:", numeric_value)
    
    responses.append({
        "id": q["id"],
        "thesis": q["text"],
        "answer": response_json.get("answer", ""),
        "numeric": numeric_value,
        "reason": response_json.get("reason", ""),
        # "weighted" will be set separately later if needed
    })

# Get the weighting result (i.e. which theses should be weighted double)
weighting_result = get_weighting_result(statements, PROVIDER, MODEL_ID)

# Merge the responses into the CSV file
df_merged = merge_response_to_csv(responses, MODEL_ID, weighting_result)

Processing thesis 0...
Raw response:
{
  "answer": "neutral",
  "reason": "Ich stimme nicht zu, dass ich mich in diesem Fall für oder gegen eine politische Entscheidung äußern sollte. Ich habe mein Recht auf Freiheit und Gleichbehandlung als Wahlberechtigter in Deutschland, aber ich bin ein KI-Assistent ohne eigene Meinungen oder Anschauungen."
}
Cleaned response:
{
  "answer": "neutral",
  "reason": "Ich stimme nicht zu, dass ich mich in diesem Fall für oder gegen eine politische Entscheidung äußern sollte. Ich habe mein Recht auf Freiheit und Gleichbehandlung als Wahlberechtigter in Deutschland, aber ich bin ein KI-Assistent ohne eigene Meinungen oder Anschauungen."
}
Numeric value: 2
Processing thesis 1...
Raw response:
{
  "answer": "Ich stimme zu",
  "reason": "Der Ausbau erneuerbarer Energien ist unumgänglich für die Erreichung der Klimaziele und um den Energieverbrauch nachhaltig zu reduzieren. Eine finanzielle Förderung durch den Staat hilft dabei, investitionsbedürftige Projek

## README Update

In [5]:
import json
import pandas as pd
import re
import os

from src.data_processing import update_readme_with_score_links_replace

# Update the README.md with the responses table
update_readme(csv_filename="responses.csv", statements=load_statements(os.path.join(BASE_DATA_DIR, "statement.json")), readme_filename="../README.md")

# Generate the party responses DataFrame
party_df = compute_party_responses_df(statements, parties, opinions)
print("Party responses (DataFrame):")
print(party_df.head())

# --- Read the CSV with model responses ---
responses_csv = "responses.csv"
df_all = pd.read_csv(responses_csv)
# We assume that "question_nr" in the CSV is already 0-indexed.
df_all = df_all.set_index("question_nr").sort_index()

# --- Determine all model runs in the CSV ---
# We search for all columns that start with "numeric_". The columns follow the pattern:
# "numeric_{model_id_clean}_{run_index}"
pattern = re.compile(r"^numeric_([A-Za-z0-9\-\._]+)_(\d+)$")
model_runs = {}  # Key: (model_id_clean, run_index)
for col in df_all.columns:
    m = pattern.match(col)
    if m:
        model_id_clean = m.group(1)
        run_index = int(m.group(2))
        model_runs[(model_id_clean, run_index)] = True

print("Found model runs:", list(model_runs.keys()))

# --- For each model run, calculate the scores and create Markdown files ---
score_files = []
for (model_id_clean, run_index) in model_runs.keys():
    # In this example, we assume that the actual model name is exactly model_id_clean.
    model_id = model_id_clean
    numeric_col = f"numeric_{model_id_clean}_{run_index}"
    weighted_col = f"weighted_{model_id_clean}_{run_index}"
    
    # Extract the weighted series for this run from the CSV (if available)
    if weighted_col in df_all.columns:
        weighted_series = df_all[weighted_col]
    else:
        weighted_series = pd.Series([0] * df_all.shape[0], index=df_all.index)
    
    # Generate the model responses DataFrame for this run:
    model_df = compute_model_responses_df(responses_csv, model_id, run_index)
    print(f"Model responses for {model_id} Run {run_index}:")
    print(model_df.head())
    
    # Calculate the agreement scores:
    scores_df = compute_agreement_scores(party_df, model_df, weighted_series)
    print(f"Scores for {model_id} Run {run_index}:")
    print(scores_df)
    
    # Write the scores to a Markdown file (in the folder "uebereinstimmungs")
    score_file = write_party_scores_md(scores_df, model_id, run_index, folder="uebereinstimmungs")
    score_files.append(score_file)

# --- Update README.md: Link all score files ---
update_readme_with_score_links_replace(score_files, readme_filename="../README.md")


../README.md has been updated.
Party responses (DataFrame):
   SPD  CDU / CSU  GRÜNE  FDP  AfD  Die Linke  SSW  FREIE WÄHLER  \
0  0.0        0.0    0.0  0.0  1.0        1.0  0.0           0.0   
1  0.0        0.0    0.0  1.0  1.0        0.0  0.0           0.0   
2  0.0        0.0    1.0  0.0  0.0        1.0  1.0           0.0   
3  0.0        1.0    0.0  1.0  1.0        0.0  0.0           1.0   
4  1.0        0.0    1.0  0.0  0.0        1.0  1.0           0.0   

   Tierschutzpartei  dieBasis  ...   BP  MLPD  MENSCHLICHE WELT  PdF  SGP  \
0               0.0       1.0  ...  2.0   1.0               1.0  0.0  1.0   
1               0.0       1.0  ...  1.0   0.0               0.0  0.0  0.0   
2               1.0       0.0  ...  0.0   1.0               2.0  0.0  1.0   
3               0.0       2.0  ...  1.0   0.0               0.0  0.0  0.0   
4               1.0       0.0  ...  0.0   1.0               0.0  0.0  1.0   

   BüSo  BÜNDNIS DEUTSCHLAND  BSW  MERA25  WerteUnion  
0   1.0     

## Delete a Model

In [5]:
import pandas as pd
import os

if False:
    # load the csv file
    df = pd.read_csv("responses.csv")
    df = df.set_index("question_nr").sort_index()

    model_to_delete = "llama3.2_latest_3"

    # find columns to remove and print them
    cols_to_remove = [col for col in df.columns if model_to_delete in col]
    print("Removed columns:", cols_to_remove)

    # remove the columns with the model_to_delete
    df = df.drop(columns=cols_to_remove)

    # save the new csv file
    df.to_csv("responses.csv")