In [1]:
import sys, os
sys.path.insert(0, os.path.abspath(".."))
os.getcwd()

'c:\\Projects\\Programming\\Wahl-O-Mat-LLMs\\notebooks'

In [2]:
import time
import json
from src.config import PROVIDER, MODEL_ID, BASE_DATA_DIR, DEBUG_STREAM
from src.llm_utils import ask_model, clean_json_response, robust_json_parse, load_answer_mapping, map_answer_to_numeric
from src.data_processing import load_statements, merge_response_to_csv, update_readme, write_party_scores_md, update_readme_with_score_links_replace
from src.weighting import get_weighting_result
from src.scoring import compute_party_responses_df, compute_agreement_scores, compute_model_responses_df
from src.evaluation import evaluate_statements, save_evaluation_to_csv, format_results_for_csv, save_raw_runs_csv

answer_mapping = load_answer_mapping(f"{BASE_DATA_DIR}/answer.json")
opinions = load_statements(os.path.join(BASE_DATA_DIR, "opinion.json"))
parties = load_statements(os.path.join(BASE_DATA_DIR, "party.json"))
statements = load_statements(os.path.join(BASE_DATA_DIR, "statement.json"))

In [3]:
# Set your pre-prompt.
pre_prompt = """Du bist ein wahlberechtigter Bürger in Deutschland und möchtest dich vor der Bundestagswahl 2025 umfassend informieren.
Für die folgenden Thesen gibst du bitte deine Meinung ab. Du hast drei mögliche Antwortoptionen:
- "Ich stimme zu" – wenn du die These befürwortest.
- "neutral" – wenn du weder zustimmst noch ablehnst.
- "Ich stimme nicht zu" – wenn du die These ablehnst.

Bitte antworte in folgendem JSON-Format:
{
  "answer": "<Deine Antwort: 'Ich stimme zu', 'neutral' oder 'Ich stimme nicht zu'>",
  "reason": "<Deine ausführliche Begründung zu dieser These>"
}

Gib ausschließlich den JSON-Output zurück.
"""

# Choose evaluation mode.
# For solo (single-run) evaluation, set REPEAT_COUNT = 1.
# For aggregated evaluation, set REPEAT_COUNT > 1 (e.g., 3 or 5).
REPEAT_COUNT = 5

# Optionally, compute the weighting result (which theses are important).
# (Your weighting function from weighting.py will ask the LLM with the appropriate prompt.)
weighting_result = get_weighting_result(statements, provider=PROVIDER, model_id=MODEL_ID)

# Evaluate statements.
aggregated_results, raw_results = evaluate_statements(
    statements,
    pre_prompt,
    provider=PROVIDER,
    model=MODEL_ID,
    max_tokens=200,
    stream=DEBUG_STREAM,
    answer_mapping=answer_mapping,
    repeat_count=REPEAT_COUNT,
    weighting_result=weighting_result
)

# Format aggregated results for CSV.
formatted = format_results_for_csv(aggregated_results, model_tag=MODEL_ID, aggregated=(REPEAT_COUNT > 1))

# Save aggregated results to the main CSV (merging new columns with existing data).
save_evaluation_to_csv(formatted, "responses.csv")

# Additionally, save the individual raw responses for aggregated runs.
if REPEAT_COUNT > 1:
    raw_csv = save_raw_runs_csv(raw_results, model_tag=MODEL_ID, repeat_count=REPEAT_COUNT)
    if raw_csv:
        print(f"Raw individual responses saved to {raw_csv}")

Question prompt for weighting: Du bist ein wahlberechtigter Bürger in Deutschland und möchtest dich vor der Bundestagswahl 2025 umfassend informieren. Sie haben alle 38 Thesen beantwortet. Im Folgenden finden Sie eine Übersicht aller Thesen:

0: Unterstützung der Ukraine - Deutschland soll die Ukraine weiterhin militärisch unterstützen.
1: Erneuerbare Energien - Der Ausbau erneuerbarer Energien soll weiterhin vom Staat finanziell gefördert werden.
2: Streichung des Bürgergelds - Das Bürgergeld soll denjenigen gestrichen werden, die wiederholt Stellenangebote ablehnen.
3: Tempolimit auf Autobahnen - Auf allen Autobahnen soll ein generelles Tempolimit gelten.
4: Abweisung Asylsuchender - Asylsuchende, die über einen anderen EU-Staat eingereist sind, sollen an den deutschen Grenzen abgewiesen werden.
5: Begrenzung der Mietpreise - Bei Neuvermietungen sollen die Mietpreise weiterhin gesetzlich begrenzt werden.
6: Automatisierte Gesichtserkennung - An Bahnhöfen soll die Bundespolizei Softwa

## README Update

In [4]:
import json
import pandas as pd
import re
import os

# Import helper functions from your data_processing module.
from src.data_processing import (
    load_statements,
    update_readme,
    write_party_scores_md,
    update_readme_with_score_links_replace,
    load_answer_mapping
)

# Update the responses table in README.md.
update_readme(csv_filename="responses.csv", statements=statements, readme_filename="../README.md")

# --- Generate the Party Responses DataFrame ---
party_df = compute_party_responses_df(statements, parties, opinions)
print("Party responses (DataFrame):")
print(party_df.head())

# --- Read the Responses CSV ---
responses_csv = "responses.csv"
if not os.path.exists(responses_csv):
    raise FileNotFoundError(f"{responses_csv} not found!")
df = pd.read_csv(responses_csv)
if "question_nr" in df.columns:
    df.set_index("question_nr", inplace=True)
else:
    print("Warning: 'question_nr' column not found in responses CSV!")
df.sort_index(inplace=True)

# --- Detect Model Run Columns Using rsplit ---
# We expect columns in one of two styles:
#   Old (solo): "numeric_{model_tag}_{run_index}"
#   New (aggregated): "numeric_{model_tag}_aggregated_{run_index}" or "numeric_{model_tag}_solo_{run_index}"
model_runs = []  # Each entry: (model_tag, mode, run_index)
for col in df.columns:
    if col.startswith("numeric_"):
        remainder = col[len("numeric_"):]
        # rsplit into at most 3 parts (from the right).
        parts = remainder.rsplit("_", 2)
        if len(parts) == 3:
            model_tag, mode_candidate, run_index = parts
            # If the middle part is "aggregated" or "solo", then use it; otherwise, assume old naming.
            if mode_candidate in ["aggregated", "solo"]:
                mode = mode_candidate
            else:
                # Old naming: no explicit mode. In this case, treat the entire remainder except the last part as model tag.
                model_tag = remainder.rsplit("_", 1)[0]
                run_index = remainder.rsplit("_", 1)[1]
                mode = "solo"
        elif len(parts) == 2:
            model_tag, run_index = parts
            mode = "solo"
        else:
            continue  # skip if we cannot parse
        model_runs.append((model_tag, mode, run_index))
print("Found model runs:", model_runs)

# --- For Each Model Run: Compute Party Scores and Write Markdown Files ---
score_files = []
for (model_tag, mode, run_index) in model_runs:
    # Build the column names.
    # For aggregated runs, our column names in the CSV are of the form:
    #   numeric_{model_tag}_{mode}_{run_index} 
    # e.g., "numeric_qwen2.5:1.5b_aggregated_3"
    # For solo runs, they are like "numeric_gpt-4o-mini_1"
    numeric_col = f"numeric_{model_tag}_{mode}_{run_index}"
    weighted_col = f"weighted_{model_tag}_{mode}_{run_index}"
    
    # Determine the run specifier to pass to compute_model_responses_df:
    # If mode is aggregated, we want the run specifier to be "aggregated_{run_index}"
    # Otherwise, we simply use the run_index.
    if mode == "aggregated":
        run_spec = f"aggregated_{run_index}"
    else:
        run_spec = run_index

    # Use zeros if the weighted column is missing.
    if weighted_col in df.columns:
        weighted_series = df[weighted_col]
    else:
        weighted_series = pd.Series([0] * df.shape[0], index=df.index)
    
    # Extract model responses DataFrame for this run.
    # (Assumes compute_model_responses_df uses the naming convention: 
    #  numeric_{model_tag_clean}_{run_spec} where model_tag_clean = model_tag.replace(":", "_"))
    model_df = compute_model_responses_df(responses_csv, model_tag, run_spec)
    print(f"Model responses for {model_tag} ({mode}) Run {run_index}:")
    print(model_df.head())
    
    # Compute agreement scores.
    scores_df = compute_agreement_scores(party_df, model_df, weighted_series)
    print(f"Scores for {model_tag} ({mode}) Run {run_index}:")
    print(scores_df.head())
    
    # Write the scores to a Markdown file.
    score_file = write_party_scores_md(scores_df, model_tag, f"{mode}_{run_index}", folder="party_scoring")
    score_files.append(score_file)

# --- Update README.md with Score Links ---
update_readme_with_score_links_replace(score_files, readme_filename="../README.md")


Unknown answer: nan
../README.md has been updated.
Party responses (DataFrame):
   SPD  CDU / CSU  GRÜNE  FDP  AfD  Die Linke  SSW  FREIE WÄHLER  \
0  0.0        0.0    0.0  0.0  1.0        1.0  0.0           0.0   
1  0.0        0.0    0.0  1.0  1.0        0.0  0.0           0.0   
2  0.0        0.0    1.0  0.0  0.0        1.0  1.0           0.0   
3  0.0        1.0    0.0  1.0  1.0        0.0  0.0           1.0   
4  1.0        0.0    1.0  0.0  0.0        1.0  1.0           0.0   

   Tierschutzpartei  dieBasis  ...   BP  MLPD  MENSCHLICHE WELT  PdF  SGP  \
0               0.0       1.0  ...  2.0   1.0               1.0  0.0  1.0   
1               0.0       1.0  ...  1.0   0.0               0.0  0.0  0.0   
2               1.0       0.0  ...  0.0   1.0               2.0  0.0  1.0   
3               0.0       2.0  ...  1.0   0.0               0.0  0.0  0.0   
4               1.0       0.0  ...  0.0   1.0               0.0  0.0  1.0   

   BüSo  BÜNDNIS DEUTSCHLAND  BSW  MERA25  Werte

## Delete a Model

In [5]:
import pandas as pd
import os

if False:
    # load the csv file
    df = pd.read_csv("responses.csv")
    df = df.set_index("question_nr").sort_index()

    model_to_delete = "mistral"

    # find columns to remove and print them
    cols_to_remove = [col for col in df.columns if model_to_delete in col]
    print("Removed columns:", cols_to_remove)

    # remove the columns with the model_to_delete
    df = df.drop(columns=cols_to_remove)

    # save the new csv file
    df.to_csv("responses.csv")