## 南アジア5国のデータ駆動エッジと背景知識の突き合わせ

In [7]:
import pandas as pd
import ollama   # Ollama の Python クライアント
import time

# ─────────────────────────────────────────────
# 1) ファイルパスとモデル名の設定
# ─────────────────────────────────────────────
INPUT_CSV     = "../data/processed/panel/edges_notears_masked.csv"
CONFLICTS_CSV = "../data/processed/panel/edges_conflicts_with_reasons.csv"
FILTERED_CSV  = "../data/processed/panel/edges_notears_filtered.csv"

OLLAMA_MODEL  = "mistral"
SLEEP_PER_EDGE = 0.5  # 各クエリ間の待機秒数

# ─────────────────────────────────────────────
# 2) 変数名 → 説明的英語表記マップ（任意）
# ─────────────────────────────────────────────
en_name_map = {
    "mean_wbgt": "mean Wet Bulb Globe Temperature (WBGT)",
    "renewable_energy_pct": "percentage of renewable energy share",
    "fossil_fuel_pct": "percentage of fossil fuel share",
    "electricity_per_capita": "electricity consumption per capita",
    "co2_per_capita": "CO2 emissions per capita",
    "gdp_per_capita": "GDP per capita",
    "unemployment_rate": "unemployment rate",
    "health_expenditure_pct": "percentage of health expenditure",
    "agri_valueadded_pct": "percentage of agriculture value-added",
    "urbanization_pct": "urbanization rate"
}

# ─────────────────────────────────────────────
# 3) English プロンプトテンプレート（判定＋理由を必ず出力）
# ─────────────────────────────────────────────
EN_PROMPT_WITH_REASON = """
You are an expert in economic and climate causal relations in South Asia.
Given two variables:
  A = "{var_a}"
  B = "{var_b}"
and a weight value indicating the estimated causal effect direction:

- If weight > 0: A is estimated to cause B to increase.
- If weight < 0: A is estimated to cause B to decrease.

Please evaluate whether this causal relationship (including its direction) is consistent with general background knowledge.

Then output EXACTLY in the following format:

VERDICT: <OK or CONFLICT>

REASON: <a brief explanation in English describing why you judged OK or CONFLICT, referring to domain knowledge>

Examples (do not output anything else):

VERDICT: OK
REASON: In South Asia, higher WBGT often leads to greater electricity consumption, so positive effect on electricity per capita is plausible.

—or—

VERDICT: CONFLICT
REASON: Typically, higher renewable energy share reduces CO2 emissions, so a positive effect of renewable energy on CO2 per capita is unlikely.

Answer only in this two‐line format, without any extra text.
── END OF FORMAT ──
"""

# ─────────────────────────────────────────────
# 4) 入力 CSV を読み込む
# ─────────────────────────────────────────────
df_edges = pd.read_csv(INPUT_CSV)

# 出力用リストを準備（判定・理由を含める）
results = []

# ─────────────────────────────────────────────
# 5) 各エッジについて Mistral に問い合わせ
# ─────────────────────────────────────────────
for idx, row in df_edges.iterrows():
    source = row["source"]
    target = row["target"]
    weight = row["weight"]

    # 5-1) 英語説明表記に置き換え（なければそのまま）
    var_a = en_name_map.get(source, source)
    var_b = en_name_map.get(target, target)

    # 5-2) プロンプトを作成
    prompt = EN_PROMPT_WITH_REASON.format(var_a=var_a, var_b=var_b)

    # 5-3) Ollama に chat で問い合わせ
    try:
        response = ollama.chat(
            model=OLLAMA_MODEL,
            messages=[
                {"role": "system", "content": "You are a helpful assistant specialized in economic causal knowledge."},
                {"role": "user",   "content": prompt}
            ]
        )
        answer = response["message"]["content"].strip()
    except Exception as e:
        print(f"⚠️ エッジ {idx+1}/{len(df_edges)} ({source}→{target}) で問い合わせエラー: {e}")
        answer = ""

    # 5-4) モデルの返答を「VERDICT: XXX」「REASON: YYY」の２行に分割
    verdict = ""
    reason  = ""
    lines = [line.strip() for line in answer.splitlines() if line.strip()]
    if len(lines) >= 2 and lines[0].upper().startswith("VERDICT:") and lines[1].upper().startswith("REASON:"):
        verdict = lines[0].split(":", 1)[1].strip()    # OK または CONFLICT
        reason  = lines[1].split(":", 1)[1].strip()
    else:
        # 形式通り返ってこなかった場合は全体を reason に入れておく
        verdict = "UNKNOWN"
        reason  = answer if answer else "(no response)"

    # 5-5) 期待応答と齟齬があるかだけはマークしておく
    if weight > 0:
        expected = "OK"
    elif weight < 0:
        expected = "OK"
    else:
        expected = "OK"
    # （本来は weight の符号とモデル回答の整合をさらに見たいが、
    # ここでは「モデルがOK/CONFLICTを出した理由をそのまま記録する」前提とする）

    # 5-6) 結果を保存（常に reason を含む）
    results.append({
        "source": source,
        "target": target,
        "weight": weight,
        "verdict": verdict,
        "reason": reason
    })

    # 5-7) コンソール表示
    print(f"[{idx+1}/{len(df_edges)}] {source}→{target}")
    print(f"  VERDICT: {verdict}")
    print(f"  REASON: {reason}\n")

    time.sleep(SLEEP_PER_EDGE)

# ─────────────────────────────────────────────
# 6) 全エッジの「判定+理由」を CSV に保存
# ─────────────────────────────────────────────
df_results = pd.DataFrame(results)
df_results.to_csv(CONFLICTS_CSV, index=False)
print(f"✅ 全エッジの判定結果と理由を {CONFLICTS_CSV} に保存しました")

# ─────────────────────────────────────────────
# 7) 齟齬 (verdict == "CONFLICT" または "UNKNOWN") を除外して新しいエッジリストを作成
# ─────────────────────────────────────────────
filtered = []
for r in results:
    if r["verdict"].upper() == "OK":
        # 原則、OK のみ次のエッジリストに残す
        filtered.append({
            "source": r["source"],
            "target": r["target"],
            "weight": r["weight"]
        })

if filtered:
    df_filtered = pd.DataFrame(filtered)
    df_filtered.to_csv(FILTERED_CSV, index=False)
    print(f"✅ OKと判断されたエッジのみを {FILTERED_CSV} に保存しました")
else:
    print("⚠️ OK と判断されたエッジが一つもありませんでした。")


[1/52] mean_wbgt→renewable_energy_pct
  VERDICT: CONFLICT
  REASON: Generally, higher WBGT does not directly influence the percentage of renewable energy share in South Asia. The two are indirectly related due to factors such as energy demand and government policies, but not causally.

[2/52] fossil_fuel_pct→renewable_energy_pct
  VERDICT: CONFLICT
  REASON: In South Asia, a higher percentage of fossil fuel share tends to decrease the percentage of renewable energy share due to reliance on traditional energy sources and lower investment in renewables.

[3/52] electricity_per_capita→renewable_energy_pct
  VERDICT: CONFLICT
  REASON: In South Asia, higher electricity consumption per capita generally leads to a decrease in the percentage of renewable energy share due to economic constraints and limited renewable energy infrastructure.

[4/52] co2_per_capita→renewable_energy_pct
  VERDICT: CONFLICT
  REASON: In South Asia, higher CO₂ emissions per capita usually decrease the percentage of 

## 国別のデータ駆動エッジと背景知識の突き合わせ

In [8]:
import pandas as pd
import ollama   # Ollama の Python クライアント
import time

# ─────────────────────────────────────────────
# 1) ファイルパスとモデル名の設定
# ─────────────────────────────────────────────
INPUT_CSV        = "../data/processed/panel/edges_per_country.csv"
CONFLICTS_CSV    = "../data/processed/panel/edges_conflicts_per_country_with_reasons.csv"
NO_CONFLICTS_CSV = "../data/processed/panel/edges_no_conflicts_per_country.csv"

OLLAMA_MODEL     = "mistral"
SLEEP_PER_EDGE   = 0.5  # 各クエリ間の待機秒数

# ─────────────────────────────────────────────
# 2) 変数名 → 説明的英語表記マップ（任意）
#    必要なキーだけ含めておけばいいです
# ─────────────────────────────────────────────
en_name_map = {
    "mean_wbgt": "mean Wet Bulb Globe Temperature (WBGT)",
    "renewable_energy_pct": "percentage of renewable energy share",
    "fossil_fuel_pct": "percentage of fossil fuel share",
    "electricity_per_capita": "electricity consumption per capita",
    "co2_per_capita": "CO2 emissions per capita",
    "gdp_per_capita": "GDP per capita",
    "unemployment_rate": "unemployment rate",
    "health_expenditure_pct": "percentage of health expenditure",
    "agri_valueadded_pct": "percentage of agriculture value-added",
    "urbanization_pct": "urbanization rate"
}

# ─────────────────────────────────────────────
# 3) English プロンプトテンプレート（判定＋理由を必ず出力）
# ─────────────────────────────────────────────
EN_PROMPT_WITH_REASON = """
You are an expert in economic and climate causal relations in South Asia.
Given two variables:
  A = "{var_a}"
  B = "{var_b}"
and a weight value indicating the estimated causal effect direction:

- If weight > 0: A is estimated to cause B to increase.
- If weight < 0: A is estimated to cause B to decrease.

Please evaluate whether this causal relationship (including its direction) is consistent with general background knowledge.

Then output EXACTLY in the following format:

VERDICT: <OK or CONFLICT>

REASON: <a brief explanation in English describing why you judged OK or CONFLICT, referring to domain knowledge>

Examples (do not output anything else):

VERDICT: OK
REASON: In South Asia, higher WBGT often leads to greater electricity consumption, so positive effect on electricity per capita is plausible.

—or—

VERDICT: CONFLICT
REASON: Typically, higher renewable energy share reduces CO2 emissions, so a positive effect of renewable energy on CO2 per capita is unlikely.

Answer only in this two‐line format, without any extra text.
── END OF FORMAT ──
"""

# ─────────────────────────────────────────────
# 4) 入力 CSV を読み込む
#    columns: country, source, target, weight
# ─────────────────────────────────────────────
df_edges = pd.read_csv(INPUT_CSV)

# 5) 出力用リストを準備
conflict_rows    = []  # 齟齬ありエッジに country, source, target, weight, verdict, reason を追加
no_conflict_rows = []  # 齟齬なしエッジに country, source, target, weight を追加

# ─────────────────────────────────────────────
# 5) 各行（エッジ）について Mistral に問い合わせ
# ─────────────────────────────────────────────
for idx, row in df_edges.iterrows():
    country = row["country"]
    source  = row["source"]
    target  = row["target"]
    weight  = row["weight"]

    # 5-1) 英語説明表記に置き換え（なければそのまま）
    var_a = en_name_map.get(source, source)
    var_b = en_name_map.get(target, target)

    # 5-2) プロンプトを作成
    prompt = EN_PROMPT_WITH_REASON.format(var_a=var_a, var_b=var_b)

    # 5-3) Ollama に chat で問い合わせ
    try:
        response = ollama.chat(
            model=OLLAMA_MODEL,
            messages=[
                {"role": "system", "content": "You are a helpful assistant specialized in economic causal knowledge."},
                {"role": "user",   "content": prompt}
            ]
        )
        answer = response["message"]["content"].strip()
    except Exception as e:
        print(f"⚠️ [{idx+1}/{len(df_edges)}] ({country}) {source}→{target}  query error: {e}")
        answer = ""

    # 5-4) モデルの返答を「VERDICT: XXX」「REASON: YYY」の２行に分割
    verdict = "UNKNOWN"
    reason  = ""
    lines = [line.strip() for line in answer.splitlines() if line.strip()]
    if len(lines) >= 2 and lines[0].upper().startswith("VERDICT:") and lines[1].upper().startswith("REASON:"):
        verdict = lines[0].split(":", 1)[1].strip()  # OK or CONFLICT
        reason  = lines[1].split(":", 1)[1].strip()
    else:
        # 形式が崩れている場合は全体を reason として残す
        reason = answer if answer else "(no response)"

    # 5-5) 結果をリストに追加
    if verdict.upper() == "OK":
        no_conflict_rows.append({
            "country": country,
            "source": source,
            "target": target,
            "weight": weight
        })
    else:
        # CONFLICT や UNKNOWN もここに含める
        conflict_rows.append({
            "country": country,
            "source": source,
            "target": target,
            "weight": weight,
            "verdict": verdict,
            "reason": reason
        })

    # 5-6) コンソールにも出力
    print(f"[{idx+1}/{len(df_edges)}] ({country}) {source}→{target}")
    print(f"  VERDICT: {verdict}")
    print(f"  REASON:  {reason}\n")

    time.sleep(SLEEP_PER_EDGE)

# ─────────────────────────────────────────────
# 6) 「齟齬ありエッジ + 判定・理由」を CSV に保存
# ─────────────────────────────────────────────
if conflict_rows:
    df_conflicts = pd.DataFrame(conflict_rows)
    df_conflicts.to_csv(CONFLICTS_CSV, index=False)
    print(f"⚠️ 齟齬ありエッジを以下に保存しました ({len(df_conflicts)} 件):")
    print(f"  → {CONFLICTS_CSV}")
else:
    print("✅ 全てのエッジが背景知識と整合していました。")

# ─────────────────────────────────────────────
# 7) 「齟齬なしエッジ」を CSV に保存
# ─────────────────────────────────────────────
if no_conflict_rows:
    df_no_conflicts = pd.DataFrame(no_conflict_rows)
    df_no_conflicts.to_csv(NO_CONFLICTS_CSV, index=False)
    print(f"✅ 齟齬なしエッジを以下に保存しました ({len(df_no_conflicts)} 件):")
    print(f"  → {NO_CONFLICTS_CSV}")
else:
    print("⚠️ 齟齬なしエッジは一つもありませんでした。")


[1/219] (Bangladesh) mean_wbgt→renewable_energy_pct
  VERDICT: CONFLICT
  REASON:  In South Asia, higher WBGT does not directly affect the percentage of renewable energy share due to lack of causal relationship between extreme weather and renewable energy adoption policies.

[2/219] (Bangladesh) fossil_fuel_pct→renewable_energy_pct
  VERDICT: OK
  REASON:  In South Asia, higher fossil fuel share typically leads to increased greenhouse gas emissions and climate change concerns, which might spur greater adoption of renewable energy sources.

[3/219] (Bangladesh) electricity_per_capita→renewable_energy_pct
  VERDICT: CONFLICT
  REASON:  Higher electricity consumption per capita typically leads to increased demand for non-renewable energy sources in South Asia, implying a negative effect on the percentage of renewable energy share.

[4/219] (Bangladesh) co2_per_capita→renewable_energy_pct
  VERDICT: CONFLICT
  REASON:  Typically, higher CO2 emissions per capita decrease the percentage of r