# Intro

# Imports

In [1]:
from __future__ import annotations
import os, re, json
from dataclasses import dataclass
from typing import Literal, Optional, Dict, Any, List
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
from groq import Groq
from openai import OpenAI as OpenAIClient

In [2]:
load_dotenv()

# API keys uit .env
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
GROQ_API_KEY   = os.getenv("GROQ_API_KEY", "")
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "")

# (Optioneel) dedicated HF endpoint URLs (laat leeg om Hosted Inference API te gebruiken)
HF_ENDPOINT_URL_DUTCH = os.getenv("HF_ENDPOINT_URL_DUTCH", "").strip()
HF_ENDPOINT_URL_EN    = ""

# Label normalisatie
CANONICAL = {"positief":"positive","negatief":"negative","positive":"positive","negative":"negative"}


In [3]:
def normalize_label(text: str) -> Optional[str]:
    if not text:
        return None
    t = text.strip().lower()
    if re.search(r"\bpos(itive|itief)?\b", t): return "positive"
    if re.search(r"\bneg(ative|atief)?\b", t): return "negative"
    for k,v in CANONICAL.items():
        if k in t: return v
    return None

def to_score(label: Optional[str]) -> Optional[int]:
    return {"positive": 1, "negative": -1}.get(label, None)

def safe_colname(name: str) -> str:
    return re.sub(r"[^A-Za-z0-9_]+", "_", name)

In [4]:
XLSX_PATH = "out/Title_Lead_Body.xlsx"

df_input = pd.read_excel(XLSX_PATH)
df_input = df_input.rename(columns={c: c.lower() for c in df_input.columns})

need = {"id","title","lead","body"}
missing = need - set(df_input.columns)
if missing:
    raise ValueError(f"Ontbrekende kolommen in {XLSX_PATH}: {missing}")

df_input["id"] = df_input["id"].astype(int)

def _to_str(x):
    return "" if pd.isna(x) else str(x)

df = df_input.copy()
df["title_text"] = df["title"].apply(_to_str)
df["lead_text"]  = df["lead"].apply(_to_str)
df["body_text"]  = df["body"].apply(_to_str)  # body al geaggregeerd tot 1 tekstveld

# Volledig lege rijen verwijderen
df = df[~(df["title_text"].eq("") & df["lead_text"].eq("") & df["body_text"].eq(""))].reset_index(drop=True)

print(df[["id","title_text","lead_text","body_text"]].head(3))
print(f"Loaded {len(df)} rows from {XLSX_PATH}")


   id                                         title_text  \
0   7  Nederlandse patiënt wacht te lang op betere me...   
1  10  Nieuwe kankermedicijnen leveren meer financiël...   
2  11         Hoe controleer je verstopte moedervlekken?   

                                           lead_text  \
0  Wat een prachtig bericht onlangs, dat meer kan...   
1  Vorige week verscheen in Trouw een artikel met...   
2  Meer dan twintig jaar geleden ontdekte ze op h...   

                                           body_text  
0  Maar het is jammer dat het zo lang duurt voord...  
1  Nederland is een mooi land waarin uiteindelijk...  
2  Eerst over die dagelijkse inspectie. Dat is ec...  
Loaded 70 rows from out/Title_Lead_Body.xlsx


In [5]:
# %%python
SYSTEM_NL = (
    "Je bent een sentimentclassifier. Geef uitsluitend 'positief' of 'negatief' terug, zonder extra uitleg."
)

def make_inst_prompt(content: str) -> str:
    # Altijd Nederlands, ook voor “Engelse” modellen (multilingual werkt prima)
    return (
        'Generative LLMs """[INST] <<SYS>>\n'
        f"{SYSTEM_NL}\n"
        "<</SYS>>\n"
        "Is het sentiment in het volgende Nederlandstalige krantenartikel\n"
        "positief of negatief?\n"
        f"{content} [/INST] \"\"\""
    )



In [6]:
# --- clients ---
hf_client_nl = InferenceClient(
    model=HF_ENDPOINT_URL_DUTCH if HF_ENDPOINT_URL_DUTCH else "BramVanroy/Llama-2-13b-chat-dutch",
    api_key=HUGGINGFACE_API_KEY,
)

hf_client_en = InferenceClient(
    model="meta-llama/Llama-2-13b-chat-hf",
    token=HUGGINGFACE_API_KEY
)

groq_client   = Groq(api_key=GROQ_API_KEY)            if GROQ_API_KEY   else None
openai_client = OpenAIClient(api_key=OPENAI_API_KEY)  if OPENAI_API_KEY else None

@dataclass
class ModelSpec:
    name: str
    provider: Literal["hf_api","groq","openai"]
    model_id: str   # informatief (logging)
    language: Literal["nl"]  # we gebruiken overal NL prompt

def _hf_chat_completion(client, model_id: str, prompt: str, temperature: float, max_tokens: int) -> str:
    comp = client.chat.completions.create(
        model=model_id,
        messages=[{"role": "user", "content": prompt}],
        max_tokens=max_tokens,
        temperature=temperature,
    )
    return comp.choices[0].message.content.strip()

def _hf_text_gen(client, prompt: str, temperature: float, max_tokens: int) -> str:
    return client.text_generation(
        prompt,
        max_new_tokens=max_tokens,
        temperature=temperature,
        do_sample=temperature > 0,
        return_full_text=False,
    ).strip()

# ==== NIEUW: HF 70B client ====
from huggingface_hub import InferenceClient

hf_client_70b = InferenceClient(
    model="meta-llama/Meta-Llama-3-70B",
    api_key=HUGGINGFACE_API_KEY,
)

def call_model(spec: ModelSpec, text: str, temperature: float=0.0, max_tokens:int=3) -> str:
    """Stuurt de NL [INST]-prompt naar de juiste provider en retourneert rauwe output."""
    prompt = make_inst_prompt(text)

    if spec.provider == "hf_api":
        # Kies juiste HF client o.b.v. model_id
        mid = spec.model_id.lower()
        if "bramvanroy/llama-2-13b-chat-dutch" in mid:
            client = hf_client_nl
        elif "meta-llama/meta-llama-3-70b" in mid or "meta-llama-3-70b" in mid:
            client = hf_client_70b
        else:
            client = hf_client_en

        if client is None:
            return "[error_hf:client_none] geen HF client beschikbaar"

        # Probeer text-generation; als provider 'conversational' vereist, schakel over naar chat
        try:
            return _hf_text_gen(client, prompt, temperature, max_tokens)
        except Exception as e:
            msg = str(e)
            if "Supported task: conversational" in msg or "not supported for task text-generation" in msg:
                try:
                    return _hf_chat_completion(client, spec.model_id, prompt, temperature, max_tokens)
                except Exception as e2:
                    return f"[error_hf_chat:{type(e2).__name__}] {e2}"
            return f"[error_hf:{type(e).__name__}] {e}"

    if spec.provider == "groq":
        assert groq_client, "GROQ_API_KEY ontbreekt in .env"
        resp = groq_client.chat.completions.create(
            model=spec.model_id,
            messages=[{"role":"user","content":prompt}],
            temperature=temperature,
            max_tokens=max_tokens,
        )
        return resp.choices[0].message.content.strip()

    if spec.provider == "openai":
        assert openai_client, "OPENAI_API_KEY ontbreekt in .env"
        resp = openai_client.chat.completions.create(
            model=spec.model_id,
            messages=[{"role":"user","content":prompt}],
            temperature=temperature,
            max_tokens=max_tokens,
        )
        return resp.choices[0].message.content.strip()

    raise ValueError(f"Unknown provider: {spec.provider}")

In [7]:
# %%python
MODELS = [
    # HF — BramVanroy NL
    ModelSpec(
        name="HF-Llama-2-13B-dutch",
        provider="hf_api",
        model_id="BramVanroy/Llama-2-13b-chat-dutch",
        language="nl",
    ),
    # HF — Llama-2 EN (krijgt NL prompt, schakelt naar conversational indien nodig)
    ModelSpec(
        name="HF-Llama-2-13B-en",
        provider="hf_api",
        model_id="meta-llama/Llama-2-13b-chat-hf",
        language="nl",
    ),
    # HF — Meta-Llama-3-70B via Inference provider
    ModelSpec(
        name="HF-Llama-3-70B",
        provider="hf_api",
        model_id="meta-llama/Meta-Llama-3-70B",
        language="nl",
    ),
    # OpenAI — GPT-4.0
    ModelSpec(
        name="GPT-4.0",
        provider="openai",
        model_id="gpt-4o",
        language="nl",
    ),
]

MODELS

[ModelSpec(name='HF-Llama-2-13B-dutch', provider='hf_api', model_id='BramVanroy/Llama-2-13b-chat-dutch', language='nl'),
 ModelSpec(name='HF-Llama-2-13B-en', provider='hf_api', model_id='meta-llama/Llama-2-13b-chat-hf', language='nl'),
 ModelSpec(name='HF-Llama-3-70B', provider='hf_api', model_id='meta-llama/Meta-Llama-3-70B', language='nl'),
 ModelSpec(name='GPT-4.0', provider='openai', model_id='gpt-4o', language='nl')]

In [8]:
# %%python
def classify_text(spec: ModelSpec, text: str) -> Dict[str, Any]:
    if not text or not text.strip():
        return {"label": None, "score": None, "raw": ""}
    raw = call_model(spec, text, temperature=0.0, max_tokens=3)
    label = normalize_label(raw)
    score = to_score(label)
    return {"label": label, "score": score, "raw": raw}


In [67]:
# %%python
results = df[["id","title_text","lead_text","body_text"]].copy()

for spec in MODELS:
    print(f"==> Running {spec.name}")
    base = safe_colname(spec.name)
    tlab, tscore = [], []
    llab, lscore = [], []
    blab, bscore = [], []

    for _, row in tqdm(df.iterrows(), total=len(df)):
        r_title = classify_text(spec, row["title_text"])
        r_lead  = classify_text(spec, row["lead_text"])
        r_body  = classify_text(spec, row["body_text"])

        tlab.append(r_title["label"]); tscore.append(r_title["score"])
        llab.append(r_lead["label"]);  lscore.append(r_lead["score"])
        blab.append(r_body["label"]);  bscore.append(r_body["score"])

    results[f"{base}_title_label"] = tlab
    results[f"{base}_title_score"] = tscore
    results[f"{base}_lead_label"]  = llab
    results[f"{base}_lead_score"]  = lscore
    results[f"{base}_body_label"]  = blab
    results[f"{base}_body_score"]  = bscore

results.head()


==> Running HF-Llama-2-13B-dutch


100%|██████████████████████████████████████████████████████████████████████████████████| 70/70 [00:27<00:00,  2.51it/s]


==> Running HF-Llama-2-13B-en


100%|██████████████████████████████████████████████████████████████████████████████████| 70/70 [00:27<00:00,  2.51it/s]


==> Running HF-Llama-3-70B


100%|██████████████████████████████████████████████████████████████████████████████████| 70/70 [00:28<00:00,  2.42it/s]


==> Running GPT-4.0


100%|██████████████████████████████████████████████████████████████████████████████████| 70/70 [02:51<00:00,  2.45s/it]


Unnamed: 0,id,title_text,lead_text,body_text,HF_Llama_2_13B_dutch_title_label,HF_Llama_2_13B_dutch_title_score,HF_Llama_2_13B_dutch_lead_label,HF_Llama_2_13B_dutch_lead_score,HF_Llama_2_13B_dutch_body_label,HF_Llama_2_13B_dutch_body_score,...,HF_Llama_3_70B_lead_label,HF_Llama_3_70B_lead_score,HF_Llama_3_70B_body_label,HF_Llama_3_70B_body_score,GPT_4_0_title_label,GPT_4_0_title_score,GPT_4_0_lead_label,GPT_4_0_lead_score,GPT_4_0_body_label,GPT_4_0_body_score
0,7,Nederlandse patiënt wacht te lang op betere me...,"Wat een prachtig bericht onlangs, dat meer kan...",Maar het is jammer dat het zo lang duurt voord...,positive,1,positive,1,positive,1,...,,,,,negative,-1,positive,1,negative,-1
1,10,Nieuwe kankermedicijnen leveren meer financiël...,Vorige week verscheen in Trouw een artikel met...,Nederland is een mooi land waarin uiteindelijk...,positive,1,positive,1,positive,1,...,,,,,negative,-1,negative,-1,negative,-1
2,11,Hoe controleer je verstopte moedervlekken?,Meer dan twintig jaar geleden ontdekte ze op h...,Eerst over die dagelijkse inspectie. Dat is ec...,positive,1,positive,1,positive,1,...,,,,,negative,-1,negative,-1,negative,-1
3,16,'Ik vind het erg als 'n infuus van 25.000 euro...,Waarom schrijven artsen 1005 milligram van een...,Ziekenhuisapotheker Roelof van Leeuwen zet zic...,positive,1,positive,1,positive,1,...,,,,,negative,-1,negative,-1,positive,1
4,21,Wachtlijsten en personeelstekort: het 'zorginf...,"De gezondheidszorg is 'op', er zit geen rek me...","Verpleegkundigen, verzorgenden, huisartsen, sp...",positive,1,positive,1,positive,1,...,,,,,negative,-1,negative,-1,negative,-1


In [68]:
# %%python
def combine_scores(row, base:str) -> Optional[int]:
    scores = [row.get(f"{base}_title_score"), row.get(f"{base}_lead_score"), row.get(f"{base}_body_score")]
    scores = [s for s in scores if s in (-1, 1)]
    if not scores:
        return None
    s = sum(scores)
    if s > 0: return 1
    if s < 0: return -1
    return 0

for spec in MODELS:
    base = safe_colname(spec.name)
    results[f"{base}_overall_score"] = results.apply(lambda r: combine_scores(r, base), axis=1)
    results[f"{base}_overall_label"] = results[f"{base}_overall_score"].map({1:"positive",-1:"negative",0:"neutral"})

results.filter(regex="overall_").head()

Unnamed: 0,HF_Llama_2_13B_dutch_overall_score,HF_Llama_2_13B_dutch_overall_label,HF_Llama_2_13B_en_overall_score,HF_Llama_2_13B_en_overall_label,HF_Llama_3_70B_overall_score,HF_Llama_3_70B_overall_label,GPT_4_0_overall_score,GPT_4_0_overall_label
0,1,positive,,,,,-1,negative
1,1,positive,,,,,-1,negative
2,1,positive,,,,,-1,negative
3,1,positive,,,,,-1,negative
4,1,positive,,,,,-1,negative


In [69]:
# %%python
OUT_XLSX = "out/newspapers_sentiment_llms.xlsx"
with pd.ExcelWriter(OUT_XLSX, engine="openpyxl") as xw:
    results.to_excel(xw, index=False, sheet_name="sentiment")
print(f"Saved -> {OUT_XLSX}")

Saved -> out/newspapers_sentiment_llms.xlsx


In [70]:
# %%python
summary_cols = []
for spec in MODELS:
    base = safe_colname(spec.name)
    summary_cols.append(f"{base}_overall_label")

display(results[["id"] + summary_cols].head(10))

for spec in MODELS:
    base = safe_colname(spec.name)
    print(f"\n=== {spec.name} ===")
    print(results[f"{base}_overall_label"].value_counts(dropna=False))

Unnamed: 0,id,HF_Llama_2_13B_dutch_overall_label,HF_Llama_2_13B_en_overall_label,HF_Llama_3_70B_overall_label,GPT_4_0_overall_label
0,7,positive,,,negative
1,10,positive,,,negative
2,11,positive,,,negative
3,16,positive,,,negative
4,21,positive,,,negative
5,26,positive,,,negative
6,28,positive,,,positive
7,30,positive,,,positive
8,35,positive,,,negative
9,37,positive,,,positive



=== HF-Llama-2-13B-dutch ===
HF_Llama_2_13B_dutch_overall_label
positive    70
Name: count, dtype: int64

=== HF-Llama-2-13B-en ===
HF_Llama_2_13B_en_overall_label
NaN    70
Name: count, dtype: int64

=== HF-Llama-3-70B ===
HF_Llama_3_70B_overall_label
NaN    70
Name: count, dtype: int64

=== GPT-4.0 ===
GPT_4_0_overall_label
negative    40
positive    30
Name: count, dtype: int64


NA values due to free rate limits reached, will start an endpoint to get the results

# Retry Llama 13B EN and Llama 70B due to rate limits

In [16]:
load_dotenv()

HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN") or ""

# Endpoints (moeten in .env staan)
#HF_ENDPOINT_URL_13B      = os.getenv("HF_ENDPOINT_URL_13B", "").strip()        # Llama-2-13B (EN) endpoint
#HF_ENDPOINT_URL_70B      = os.getenv("HF_ENDPOINT_URL_70B", "").strip()        # Llama-3-70B endpoint
HF_ENDPOINT_URL_8B_DUTCH = os.getenv("HF_ENDPOINT_URL_8B_DUTCH", "").strip()   # ReBatch/Llama-3-8B-dutch endpoint
HF_ENDPOINT_URL_8B_EN    = os.getenv("HF_ENDPOINT_URL_8B_EN", "").strip()      # Meta-Llama-3-8B endpoint
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")

assert HUGGINGFACE_API_KEY, "HUGGINGFACE_API_KEY/HF_TOKEN ontbreekt in .env"
#assert HF_ENDPOINT_URL_13B, "HF_ENDPOINT_URL_13B ontbreekt in .env"
#assert HF_ENDPOINT_URL_70B, "HF_ENDPOINT_URL_70B ontbreekt in .env"
assert HF_ENDPOINT_URL_8B_DUTCH, "HF_ENDPOINT_URL_8B_DUTCH ontbreekt in .env"
assert HF_ENDPOINT_URL_8B_EN, "HF_ENDPOINT_URL_8B_EN ontbreekt in .env"
assert GROQ_API_KEY, "GROQ_API_KEY ontbreekt in .env"

# HF clients: gebruik jouw dedicated endpoints
#hf_client_en       = InferenceClient(model=HF_ENDPOINT_URL_13B,      api_key=HUGGINGFACE_API_KEY)  # Llama-2-13B (EN)
#hf_client_70b      = InferenceClient(model=HF_ENDPOINT_URL_70B,      api_key=HUGGINGFACE_API_KEY)  # Llama-3-70B
hf_client_8b_dutch = InferenceClient(model=HF_ENDPOINT_URL_8B_DUTCH, api_key=HUGGINGFACE_API_KEY)  # ReBatch L3-8B Dutch
hf_client_8b_en    = InferenceClient(model=HF_ENDPOINT_URL_8B_EN,    api_key=HUGGINGFACE_API_KEY)  # Meta-Llama-3-8B (EN)
groq_client = Groq(api_key=GROQ_API_KEY)

In [24]:
def _hf_chat_completion(client, prompt: str, temperature: float, max_tokens: int) -> str:
    comp = client.chat.completions.create(
        model=client.model,  # endpoint zelf bepaalt het onderliggende model
        messages=[{"role": "user", "content": prompt}],
        max_tokens=max_tokens,
        temperature=temperature,
    )
    return comp.choices[0].message.content.strip()

def _hf_text_gen(client, prompt: str, temperature: float, max_tokens: int) -> str:
    return client.text_generation(
        prompt,
        max_new_tokens=max_tokens,
        temperature=temperature,
        do_sample=temperature > 0,
        return_full_text=False,
    ).strip()

def call_model(spec: ModelSpec, text: str, temperature: float=0.0, max_tokens:int=3) -> str:
    prompt = make_inst_prompt(text)

    if spec.provider == "hf_api":
        mid = spec.model_id.lower()
        if "llama-2-13b" in mid:
            client = hf_client_en
        elif "rebatch/llama-3-8b-dutch" in mid:
            client = hf_client_8b_dutch
        elif "meta-llama/Meta-Llama-3-8B".lower() in mid or ("llama-3" in mid and "8b" in mid):
            client = hf_client_8b_en
        else:
            return "[error_hf:client_map] Onbekend HF model_id voor deze run"

        try:
            return _hf_text_gen(client, prompt, temperature, max_tokens)
        except Exception as e:
            msg = str(e)
            if "Supported task: conversational" in msg or "not supported for task text-generation" in msg:
                try:
                    return _hf_chat_completion(client, prompt, temperature, max_tokens)
                except Exception as e2:
                    return f"[error_hf_chat:{type(e2).__name__}] {e2}"
            return f"[error_hf:{type(e).__name__}] {e}"

    if spec.provider == "groq":
        # 70B via GROQ
        resp = groq_client.chat.completions.create(
            model=spec.model_id,              # bijv. "llama-3.3-70b-versatile"
            messages=[{"role":"user","content":prompt}],
            temperature=temperature,
            max_tokens=max_tokens,
        )
        return resp.choices[0].message.content.strip()

    if spec.provider == "openai":
        assert openai_client, "OPENAI_API_KEY ontbreekt in .env"
        resp = openai_client.chat.completions.create(
            model=spec.model_id,              # bijv. "gpt-5"
            messages=[{"role": "user", "content": prompt}],
            max_completion_tokens=max_tokens,
        )
        return resp.choices[0].message.content.strip()

    raise ValueError(f"Unknown provider: {spec.provider}")

In [22]:
MODELS = [
    # Llama-2-13B (EN) via jouw endpoint
    #ModelSpec(
     #   name="HF-Llama-2-13B-en",
      #  provider="hf_api",
       # model_id="meta-llama/Llama-2-13b-chat-hf",
        #language="nl",
    #),
    # OpenAI — GPT-5 (zorg dat je key & toegang hebt)
    ModelSpec(
        name="GPT-5",
        provider="openai",
        model_id="gpt-5",   # gebruik het exacte model-id dat in jouw account beschikbaar is
        language="nl",
    ),
    # ReBatch Llama-3-8B Dutch via jouw endpoint
    ModelSpec(
        name="HF-Llama-3-8B-dutch",
        provider="hf_api",
        model_id="ReBatch/Llama-3-8B-dutch",
        language="nl",
    ),
    # Meta-Llama-3-8B (EN) via jouw endpoint
    ModelSpec(
        name="HF-Llama-3-8B-en",
        provider="hf_api",
        model_id="meta-llama/Meta-Llama-3-8B",
        language="nl",
    ),
    # Llama 70B via GROQ
    ModelSpec(
        name="Groq-Llama-70B",
        provider="groq",
        model_id="llama-3.3-70b-versatile", 
        language="nl",
    )
]

MODELS


[ModelSpec(name='GPT-5', provider='openai', model_id='gpt-5', language='nl'),
 ModelSpec(name='HF-Llama-3-8B-dutch', provider='hf_api', model_id='ReBatch/Llama-3-8B-dutch', language='nl'),
 ModelSpec(name='HF-Llama-3-8B-en', provider='hf_api', model_id='meta-llama/Meta-Llama-3-8B', language='nl'),
 ModelSpec(name='Groq-Llama-70B', provider='groq', model_id='llama-3.3-70b-versatile', language='nl')]

In [25]:
# %%python
results = df[["id","title_text","lead_text","body_text"]].copy()

for spec in MODELS:
    print(f"==> Running {spec.name}")
    base = safe_colname(spec.name)
    tlab, tscore = [], []
    llab, lscore = [], []
    blab, bscore = [], []

    for _, row in tqdm(df.iterrows(), total=len(df)):
        r_title = classify_text(spec, row["title_text"])
        r_lead  = classify_text(spec, row["lead_text"])
        r_body  = classify_text(spec, row["body_text"])

        tlab.append(r_title["label"]); tscore.append(r_title["score"])
        llab.append(r_lead["label"]);  lscore.append(r_lead["score"])
        blab.append(r_body["label"]);  bscore.append(r_body["score"])

    results[f"{base}_title_label"] = tlab
    results[f"{base}_title_score"] = tscore
    results[f"{base}_lead_label"]  = llab
    results[f"{base}_lead_score"]  = lscore
    results[f"{base}_body_label"]  = blab
    results[f"{base}_body_score"]  = bscore

results.head()

==> Running GPT-5


100%|██████████████████████████████████████████████████████████████████████████████████| 70/70 [03:26<00:00,  2.95s/it]


==> Running HF-Llama-3-8B-dutch


100%|█████████████████████████████████████████████████████████████████████████████████| 70/70 [00:00<00:00, 187.03it/s]


==> Running HF-Llama-3-8B-en


100%|█████████████████████████████████████████████████████████████████████████████████| 70/70 [00:00<00:00, 166.81it/s]


==> Running Groq-Llama-70B


100%|██████████████████████████████████████████████████████████████████████████████████| 70/70 [01:05<00:00,  1.08it/s]


Unnamed: 0,id,title_text,lead_text,body_text,GPT_5_title_label,GPT_5_title_score,GPT_5_lead_label,GPT_5_lead_score,GPT_5_body_label,GPT_5_body_score,...,HF_Llama_3_8B_en_lead_label,HF_Llama_3_8B_en_lead_score,HF_Llama_3_8B_en_body_label,HF_Llama_3_8B_en_body_score,Groq_Llama_70B_title_label,Groq_Llama_70B_title_score,Groq_Llama_70B_lead_label,Groq_Llama_70B_lead_score,Groq_Llama_70B_body_label,Groq_Llama_70B_body_score
0,7,Nederlandse patiënt wacht te lang op betere me...,"Wat een prachtig bericht onlangs, dat meer kan...",Maar het is jammer dat het zo lang duurt voord...,,,,,,,...,,,,,negative,-1,positive,1,negative,-1
1,10,Nieuwe kankermedicijnen leveren meer financiël...,Vorige week verscheen in Trouw een artikel met...,Nederland is een mooi land waarin uiteindelijk...,,,,,,,...,,,,,negative,-1,negative,-1,negative,-1
2,11,Hoe controleer je verstopte moedervlekken?,Meer dan twintig jaar geleden ontdekte ze op h...,Eerst over die dagelijkse inspectie. Dat is ec...,,,,,,,...,,,,,positive,1,negative,-1,positive,1
3,16,'Ik vind het erg als 'n infuus van 25.000 euro...,Waarom schrijven artsen 1005 milligram van een...,Ziekenhuisapotheker Roelof van Leeuwen zet zic...,,,,,,,...,,,,,negative,-1,negative,-1,positive,1
4,21,Wachtlijsten en personeelstekort: het 'zorginf...,"De gezondheidszorg is 'op', er zit geen rek me...","Verpleegkundigen, verzorgenden, huisartsen, sp...",,,,,,,...,,,,,negative,-1,negative,-1,negative,-1


In [27]:
import os
import pandas as pd
from pathlib import Path

IN_PATH  = Path("out/newspapers_sentiment_llms.xlsx")
BACKUP   = IN_PATH.with_suffix(".backup.xlsx")

if not IN_PATH.exists():
    raise FileNotFoundError(f"Bestand niet gevonden: {IN_PATH} (pas pad/naam aan indien nodig)")

# 1) Lees bestaande output
df_out = pd.read_excel(IN_PATH)

# 2) Bepaal welke modellen we nu willen updaten (alleen 13B EN & 70B in je huidige MODELS)
#    We pakken exact de kolommen die in 'results' voor die modellen zijn aangemaakt.
model_bases = [safe_colname(m.name) for m in MODELS]  # bv. ['HF_Llama_2_13B_en','HF_Llama_3_70B']
cols_to_copy = ["id"]
for base in model_bases:
    # alle kolommen in 'results' die met dit base beginnen
    cols_for_base = [c for c in results.columns if c.startswith(base + "_")]
    cols_to_copy.extend(cols_for_base)

# 3) Maak nieuw subset uit 'results' met alleen de relevante kolommen
new_sub = results.loc[:, [c for c in cols_to_copy if c in results.columns]].copy()

# 4) Overwrite per kolom op basis van 'id'
#    We gebruiken map()+combine_first zodat nieuwe waarden voorrang krijgen,
#    en oude waarden behouden blijven waar geen nieuwe zijn.
if "id" not in df_out.columns:
    raise ValueError("Kolom 'id' ontbreekt in bestaande output. Kan niet mergen.")

new_sub_indexed = new_sub.set_index("id")

for col in new_sub.columns:
    if col == "id":
        continue
    # mapping van id -> nieuwe waarde
    mapping = new_sub_indexed[col]
    # projecteer nieuwe waarden op df_out in 'm'
    m = df_out["id"].map(mapping)
    if col in df_out.columns:
        # nieuwe waardes overschrijven waar beschikbaar, anders bestaande behouden
        df_out[col] = m.combine_first(df_out[col])
    else:
        # kolom bestond nog niet: direct toevoegen
        df_out[col] = m

# 5) Backup maken en terugschrijven
if not BACKUP.exists():
    df_out.to_excel(BACKUP, index=False)
    print(f"Backup geschreven → {BACKUP}")

# Schrijf naar hetzelfde bestand
df_out.to_excel(IN_PATH, index=False)
print(f"Geüpdatet → {IN_PATH}")

# 6) (optioneel) Snelle controle: toon de net geüpdatete overall labels
check_cols = []
for base in model_bases:
    colname = f"{base}_overall_label"
    if colname in df_out.columns:
        check_cols.append(colname)

if check_cols:
    display(df_out[["id"] + check_cols].head())
else:
    print("Let op: geen *_overall_label kolommen gevonden om te tonen (controleer namen).")

Backup geschreven → out\newspapers_sentiment_llms.backup.xlsx
Geüpdatet → out\newspapers_sentiment_llms.xlsx
Let op: geen *_overall_label kolommen gevonden om te tonen (controleer namen).


In [31]:
# %%python
import pandas as pd
from pathlib import Path
import numpy as np

IN_PATH = Path("out/newspapers_sentiment_llms.xlsx")
BACKUP  = IN_PATH.with_suffix(".backup.xlsx")

df = pd.read_excel(IN_PATH)

# 1) Vind alle "bases" die score-kolommen hebben
def collect_bases_with_scores(columns):
    bases = set()
    for c in columns:
        if c.endswith("_title_score") or c.endswith("_lead_score") or c.endswith("_body_score"):
            base = c.rsplit("_", 2)[0]  # strip "_{part}_score"
            bases.add(base)
    return sorted(bases)

# 2) Check welke bases géén overall_label hebben
def bases_missing_overall_label(df, bases):
    missing = []
    for base in bases:
        if f"{base}_overall_label" not in df.columns:
            # Alleen meenemen als er minstens één scorekolom bestaat
            parts = [f"{base}_title_score", f"{base}_lead_score", f"{base}_body_score"]
            if any(p in df.columns for p in parts):
                missing.append(base)
    return missing

def infer_label_from_scores(row, base):
    parts = []
    for p in ("title", "lead", "body"):
        col = f"{base}_{p}_score"
        if col in row and pd.notna(row[col]):
            parts.append(row[col])

    if not parts:
        return np.nan  # geen info

    arr = np.array(parts, dtype=float)

    # Heuristiek: als alle scores binnen [0,1] liggen -> threshold 0.5
    # anders ga uit van [-1,1] en threshold 0.0
    finite = arr[np.isfinite(arr)]
    if finite.size == 0:
        return np.nan

    scores_min, scores_max = finite.min(), finite.max()
    avg = float(finite.mean())

    if 0.0 <= scores_min and scores_max <= 1.0:
        # binaire score (0/1 of prob)
        return "positive" if avg >= 0.5 else "negative"
    else:
        # signed score (-1..1)
        if avg > 0.0:
            return "positive"
        elif avg < 0.0:
            return "negative"
        else:
            # exact gelijk -> kies 'negative' als conservatieve default
            return "negative"

# 3) Bereken ontbrekende overall_labels (o.a. Groq_Llama_70B)
bases = collect_bases_with_scores(df.columns)
missing = bases_missing_overall_label(df, bases)

if not missing:
    print("Geen bases gevonden met ontbrekende *_overall_label op basis van *_score kolommen.")
else:
    print("Maak overall_label voor:", missing)
    for base in missing:
        df[f"{base}_overall_label"] = df.apply(lambda r: infer_label_from_scores(r, base), axis=1)

    # Backup (eenmalig)
    if not BACKUP.exists():
        df.to_excel(BACKUP, index=False)
        print(f"Backup geschreven → {BACKUP}")

    # Schrijf terug
    df.to_excel(IN_PATH, index=False)
    print(f"Geüpdatet → {IN_PATH}")

# 4) Samenvatting: verdeling voor kolommen zonder NaN
overall_cols = [c for c in df.columns if c.endswith("_overall_label")]
non_na_cols = [c for c in overall_cols if df[c].notna().all()]

if not non_na_cols:
    print("Geen *_overall_label kolommen zonder NaN gevonden.")
else:
    for col in non_na_cols:
        print(f"\n=== {col} ===")
        print(df[col].value_counts(dropna=False))


Maak overall_label voor: ['GPT_5', 'Groq_Llama_70B', 'HF_Llama_3_8B_dutch', 'HF_Llama_3_8B_en']
Geüpdatet → out\newspapers_sentiment_llms.xlsx

=== HF_Llama_2_13B_dutch_overall_label ===
HF_Llama_2_13B_dutch_overall_label
positive    70
Name: count, dtype: int64

=== GPT_4_0_overall_label ===
GPT_4_0_overall_label
negative    40
positive    30
Name: count, dtype: int64

=== Groq_Llama_70B_overall_label ===
Groq_Llama_70B_overall_label
positive    36
negative    34
Name: count, dtype: int64
