In [None]:
# Step 1: Install & Import Dependencies
print("Step 1: Installing dependencies...")

!pip install openai python-dotenv --quiet

import os, json, hashlib, re, time
from functools import lru_cache
from collections import deque
from openai import OpenAI, OpenAIError
from typing import List, Dict


Step 1: Installing dependencies...


In [None]:
# Step 2: Initialize OpenAI Client with Deterministic Parameters
print("Step 2: Initializing OpenAI client…")

client = OpenAI(api_key="sk-XXX")

# Global settings
MODEL_NAME = "gpt-4o-mini-2024-07-18"
TEMPERATURE = 0.0
TOP_P = 0.0          # nucleus sampling off for determinism
TOP_K = 1            # restrict to highest-probability token
MAX_TOKENS = 2048
LOW_CONF_THRESH = 5.0

LLM_CALL_COUNT = 0
LOW_CONF_FLAG_COUNT = 0

print(f"OpenAI client initialized with model {MODEL_NAME}")


Step 2: Initializing OpenAI client…
OpenAI client initialized with model gpt-4o-mini-2024-07-18


In [None]:
# Step 3: Receptor – LLM Call with Caching & JSON Mode
@lru_cache(maxsize=128)  # cache up to 128 unique prompts :contentReference[oaicite:7]{index=7}
def llm_call(prompt: str, seed: int = None) -> str:
    """
    Call the OpenAI chat API with JSON-mode, deterministic sampling,
    and optional seed for reproducibility :contentReference[oaicite:8]{index=8}.
    """
    print(f"[LLM CALL] 📡 Sending prompt to {MODEL_NAME} (max {MAX_TOKENS} tokens, temp={TEMPERATURE})")
    print(f"[LLM CALL] Prompt hash: {hashlib.sha256(prompt.encode()).hexdigest()[:8]}")
    try:
        params = {
            "model": MODEL_NAME,
            "messages": [{"role": "user", "content": prompt}],
            "temperature": TEMPERATURE,
            "top_p": TOP_P,
            "max_tokens": MAX_TOKENS,
            "response_format": {"type": "json_object"}
        }
        if seed is not None:
            params["seed"] = seed  # best-effort deterministic sampling
        resp = client.chat.completions.create(**params)
        content = resp.choices[0].message.content.strip()
        print("[LLM CALL] ✅ Response received")
        return content
    except OpenAIError as e:
        print(f"[LLM CALL] ❌ API error: {e}")
        return ""
    except Exception as e:
        print(f"[LLM CALL] ❌ Unexpected error: {e}")
        return ""
    return ""


In [None]:
# Step 4: Unconsciousness – Simple Two-Level Feedback Queue Scheduler
class MFQScheduler:
    def __init__(self):
        self.high = deque()  # high-priority queue
        self.low = deque()   # low-priority queue

    def enqueue(self, task, high_priority=False):
        (self.high if high_priority else self.low).append(task)

    def run_all(self):
        # Always drain high queue first (quantum jump)
        while self.high or self.low:
            task = self.high.popleft() if self.high else self.low.popleft()
            result = task()
            # After run, demote to low queue for re-evaluation next time
            # (simulates fading out of consciousness)
            yield result


In [None]:
# Step 5: CRIT Subroutines (Claim, Reasons, Validation, Aggregation)
def extract_claim(document: str, seed: int = None) -> str:
    print("🔍 Extracting claim with JSON mode…")
    prompt = (
        "Please respond ONLY with valid JSON: {\"claim\": \"<text>\"}.\n"
        "Identify the single main claim in the document below.\n\n"
        f"{document}\n\nJSON:"
     )

    global LLM_CALL_COUNT
    LLM_CALL_COUNT += 1

    out = llm_call(prompt, seed)
    print(f"[LLM RAW CLAIM OUTPUT] {out!r}")
    try:
        claim = json.loads(out)["claim"]
    except:
        claim = out.strip()
    print(f"➡️ Claim: {claim}\n")
    return claim

def extract_reasons(document: str, claim: str, support: bool = True, seed: int = None) -> list[str]:
    tag = "supporting" if support else "counter"
    print(f"🔍 Extracting {tag} reasons for the claim...")
    prompt = (
        f"List the {tag} reasons for the claim below as a JSON array of strings.\n"
        f"Claim: {claim}\n\n{document}\n\n"
        f"{tag.capitalize()}Reasons:"
    )

    global LLM_CALL_COUNT
    LLM_CALL_COUNT += 1

    out = llm_call(prompt, seed)
    print(f"[LLM RAW {tag.capitalize()} OUTPUT] {out!r}")
    # Parse JSON array
    try:
        data = json.loads(out)
        key = next((k for k in data if k.lower().endswith("reasons")), None)
        reasons = data[key] if key else []
        # reasons = data.get(f"{tag}Reasons") or next(iter(data.values()))
    except Exception:
        reasons = [line.strip(" -•") for line in out.splitlines() if line.strip()]

    print(f"➡️ Found {len(reasons)} {tag} reason(s): {reasons}\n")
    return reasons

def validate_reason(reason: str, claim: str, seed: int = None) -> tuple[float, float]:
    global LLM_CALL_COUNT, LOW_CONF_FLAG_COUNT
    print(f"🔎 Validating reason snippet: {reason[:80]}…")
    print(f"    [DEBUG] current LOW_CONF_FLAG_COUNT = {LOW_CONF_FLAG_COUNT}")
    print(f"    [DEBUG] LOW_CONF_THRESH         = {LOW_CONF_THRESH}")

    prompt = (
        "You are scoring support for the claim on a 0–10 scale (0=no support, 10=strongest support) "
        "and credibility on a 0–10 scale (0=not credible, 10=highly credible). "
        "Respond ONLY with JSON: {\"score\": <0–10>, \"credibility\": <0–10>}.\n\n"
        f"Claim: {claim}\nReason: {reason}"
    )

    LLM_CALL_COUNT += 1

    raw = llm_call(prompt, seed)
    print(f"[LLM RAW VALIDATION OUTPUT] {raw!r}")
    try:
        data = json.loads(raw)
        score = max(0.0, min(10.0, float(data.get("score",0))))
        cred  = max(0.0, min(10.0, float(data.get("credibility",0))))
    except Exception as e:
        print(f"⚠️ Parse error: {e}")
        score, cred = 0.0, 0.0

    if cred < LOW_CONF_THRESH:
        print(f"⚠️ Credibility {cred} < threshold {LOW_CONF_THRESH}, incrementing LOW_CONF_FLAG_COUNT")
        LOW_CONF_FLAG_COUNT += 1
        print(f"    [DEBUG] NEW LOW_CONF_FLAG_COUNT = {LOW_CONF_FLAG_COUNT}")

    print(f"➡️ Parsed: score={score}, credibility={cred}\n")
    return score, cred

def compute_gamma(sup_scores, sup_creds, cnt_scores, cnt_creds) -> float:
    print("🧮 Computing final CRIT score (Gamma)…")
    total_sup = sum(s*c for s,c in zip(sup_scores, sup_creds))
    total_cnt = sum(s*c for s,c in zip(cnt_scores, cnt_creds))
    count = len(sup_scores) + len(cnt_scores)

    total_cred = sum(sup_creds) + sum(cnt_creds)
    gamma = (total_sup - total_cnt) / total_cred if total_cred else 0.0

    print(f"➡️ Gamma raw value: {gamma}\n")
    return gamma


In [None]:
# Step 6: End-to-End CRIT Validation with MFQ Scheduling
def crit_validate(document: str, seed: int = None) -> list[dict]:
    global LLM_CALL_COUNT, LOW_CONF_FLAG_COUNT
    LLM_CALL_COUNT = 0
    LOW_CONF_FLAG_COUNT = 0

    print("===🚀 Starting multi-claim CRIT pipeline ===\n")
    overall_start = time.time()

    # --- BEGIN REPLACEMENT BLOCK ---
    # 1A. Try to split on explicit Claim headers
    claim_headers = re.findall(r"\*\*(Claim\s+\d[^\*]+)\*\*", document)
    sections = re.split(   r"\*\*Claim\s+\d[^\*]+\*\*", document)

    if claim_headers:
        # Build one chunk per numbered claim
        chunks = []
        for i, header in enumerate(claim_headers, start=1):
            text = sections[i].strip()
            chunks.append((header, text))
        # Add the remainder as "Overall Assertion"
        tail = sections[len(claim_headers)+1].strip() \
               if len(sections) > len(claim_headers)+1 else ""
        chunks.append(("Overall Assertion", tail))

    else:
        # 1B. FALLBACK: split the doc into paragraphs
        paras = re.split(r"\n\s*\n", document)
        # label each paragraph for debugging
        chunks = [(f"Paragraph #{i+1}", p.strip())
                  for i, p in enumerate(paras) if p.strip()]
    # --- END REPLACEMENT BLOCK ---

    # 2. Schedule each chunk’s CRIT job
    scheduler = MFQScheduler()
    for label, chunk_text in chunks:
        print(f"── Enqueuing CRIT for {label}")
        scheduler.enqueue(lambda txt=chunk_text: _crit_chunk(txt, seed),
                          high_priority=True)

    # 3. Run jobs and collect
    reports = []
    for report in scheduler.run_all():
        reports.append(report)

    total_time = time.time() - overall_start
    gammas     = [r["gamma_score"] for r in reports]
    n          = len(gammas)
    n_support  = sum(1 for g in gammas if g > 0)
    n_opp      = sum(1 for g in gammas if g < 0)
    avg_gamma  = sum(gammas)/n if n else 0
    mean_abs   = sum(abs(g) for g in gammas)/n if n else 0

    print("\n===📊 CRIT pipeline summary ===")
    print(f"• Chunks processed     : {n}")
    print(f"• Total LLM calls      : {LLM_CALL_COUNT}")
    print(f"• Low-cred flags       : {LOW_CONF_FLAG_COUNT}")
    print(f"• Total time           : {total_time:.2f}s")
    print(f"• Γ-scores (min…max)   : {min(gammas):.2f} … {max(gammas):.2f}")
    print(f"             mean      : {avg_gamma:.2f}")
    print(f"             mean|Γ|   : {mean_abs:.2f}")
    print(f"• Support vs Opposition: {n_support} vs {n_opp}\n")

    print("\n===🎉 Multi-claim CRIT pipeline complete ===\n")
    return reports

def _crit_chunk(document: str, seed: int) -> dict:
    print("── ▶️ [Chunk Job] Starting CRIT on document chunk\n")
    start = time.time()

    # 1. Claim
    claim = extract_claim(document, seed)

    # 2. Reasons
    supports = extract_reasons(document, claim, support=True,  seed=seed)
    counters = extract_reasons(document, claim, support=False, seed=seed)
    if not supports: print("⚠️ [Chunk Job] No supporting reasons extracted!")
    if not counters: print("⚠️ [Chunk Job] No counter reasons extracted!")

    # 3. Validation
    sup_scores, sup_creds = [], []
    print("🛡️ [Chunk Job] Validating supporting reasons…")
    for idx, reason in enumerate(supports, 1):
        s, c = validate_reason(reason, claim, seed)
        sup_scores.append(s); sup_creds.append(c)
        if c < LOW_CONF_THRESH:
            print(f"⚠️ Low credibility ({c}) for supporting reason #{idx}")

    cnt_scores, cnt_creds = [], []
    print("⚔️ [Chunk Job] Validating counter reasons…")
    for idx, reason in enumerate(counters, 1):
        s, c = validate_reason(reason, claim, seed)
        cnt_scores.append(s); cnt_creds.append(c)
        if c < LOW_CONF_THRESH:
            print(f"⚠️ Low credibility ({c}) for counter reason #{idx}")

    # 4. Gamma computation + interpretation
    gamma = compute_gamma(sup_scores, sup_creds, cnt_scores, cnt_creds)

    # 5. Interpretation
    print("🔢 Gamma Interpretation:")
    if   gamma>= 8: msg="Very strong support"
    elif gamma>= 2: msg="Moderate support"
    elif gamma>= 0: msg="Weak support"
    elif gamma>=-2: msg="Weak opposition"
    elif gamma>=-8: msg="Moderate opposition"
    else:           msg="Very strong opposition"
    print(f"  • Γ = {gamma:.2f} → {msg}")

    duration = time.time() - start
    print(f"⏱️ Chunk completed in {duration:.2f}s\n")

    # 6. Report
    report = {
        "claim": claim,
        "supporting_reasons": supports,
        "support_scores": sup_scores,
        "support_credibility": sup_creds,
        "counter_reasons": counters,
        "counter_scores": cnt_scores,
        "counter_credibility": cnt_creds,
        "gamma_score": gamma
    }
    print("\n📝 Detailed Report:")
    print(json.dumps(report, indent=2))
    return report


In [None]:
# Step 7: Smoke Test
print("🔬 Running smoke test with sample document...\n")
# doc = """
# When cases increase and transmission accelerates, it’s more likely that new dangerous and
# more transmissible variants emerge, which can spread more easily or cause more severe illness.
# Based on what we know so far, vaccines are proving effective against existing variants,
# especially at preventing severe disease, hospitalization and death. However, some variants
# are having a slight impact on the ability of vaccines to guard against mild disease and infection.
# Vaccines are likely staying effective against variants because of the broad immune response
# they cause, which means that virus changes or mutations are unlikely to make vaccines
# completely ineffective.
# """

doc = """
In recent years, the rise of vertical farming has been heralded as a transformative solution to global food security challenges.
A 2021 report from the International Agricultural Institute claimed that “vertical farms can produce up to 300 times more yield per square meter than conventional fields” through optimized LED lighting and hydroponic nutrient delivery. However, subsequent field trials in temperate climates have shown only a 50–70× increase when factoring in energy costs and pest management.

Proponents argue that vertical farming’s closed-loop water systems reduce consumption by up to 95% compared to traditional irrigation [National Water Council, 2022]. In contrast, critics note that real-world operations often leak or require frequent nutrient flushes, driving water use closer to 60% savings—still significant, but far from ideal.

Claim 1: Vertical farms drastically improve land-use efficiency.
• Supporting Reason A: By stacking multiple growing layers, a single urban warehouse footprint can replace dozens of hectares of open farmland.
• Supporting Reason B: Controlled environments eliminate seasonality, enabling year-round production of leafy greens at up to 50 harvest cycles per year.
• Counter-Reason C: The need for artificial climate control consumes large amounts of electricity, which—unless powered by renewables—diminishes net land-use benefits when accounting for upstream energy infrastructure.

A lesser-studied benefit is the reduction in food miles: vertical farms situated within cities can cut average produce transportation distances from hundreds of kilometers to mere blocks. Yet, this improvement hinges on proximity to distribution centers; standalone facilities in exurban zones see transport savings drop below 30% due to last-mile logistics.

Claim 2 (nested): Urban vertical farms contribute meaningfully to carbon emission reductions.
• Supporting Reason D: A Life-Cycle Assessment (LCA) by GreenMetrics (2023) estimated a 40% cradle-to-fork greenhouse-gas reduction for locally grown lettuce versus field-grown counterparts.
• Counter-Reason E: The same LCA noted that if fossil-fuel electricity powers climate and lighting systems, up to 80% of those savings are negated.

Finally, the economic viability of vertical farms remains under debate. Seed funding in 2024 has poured billions into so-called “agritech unicorns,” yet only 15–20% have reached consistent profitability after three years. Investors point to high capital expenditures—upwards of $2,000 per square meter of growing space—and ongoing LED replacement costs. On the other hand, operators highlight premium pricing (often 2–3× conventional market rates) and reduced spoilage as drivers of positive cash flow in well-managed facilities.

Overall Assertion: While vertical farming holds great promise for intensifying food production and reducing resource footprints, its real-world impact depends critically on renewable energy integration, urban location logistics, and optimized operational practices.
"""

# doc = """
# **Claim 1: Quantum-powered irrigation can increase crop yields by 1,000×**
# In 2025, the startup “AgriFlux” patented a quantum-entanglement irrigation system they claim drips water directly into plant cell vacuoles.  According to their white paper, “Field trials in Nevada and the Gobi Desert showed yield multipliers from 50× up to a truly staggering 1,000× compared to standard drip irrigation.”  They allege energy costs were negligible because the device “harvests ambient cosmic microwave background radiation.”

# Despite these assertions, independent testers observed only a 2–3× boost under laboratory conditions, and many trials “failed outright” when humidity was above 20% — a curious side-effect the company glosses over.

# ---

# **Claim 2: Urban algae farms will power entire city grids by 2030**
# A consortium of five technology firms published in *Futuristic Energy Monthly* that “rhodophyte bioreactors installed on every skyscraper façade in Tokyo will generate 4 GW of peak power, enough to offset 30% of the city’s nighttime grid load.”  They cite proprietary “photosynthetic amplification catalysts” that allegedly boost algal photon capture by 700%.

# However, panelists at the 2024 GreenTech Summit noted that scale-up tests only ever produced ~50 kW in total from a 100 m² installation — orders of magnitude below projections.  Critics also question the environmental impact of continuous red-light exposure and potential eutrophication from algae runoff.

# ---

# **Claim 3: AI-driven MFQ schedulers achieve human-level multitasking**
# Drawing on the CoCoMo framework, ZephyrAI claims their “MFQ-X” scheduler can juggle 1,024 simultaneous background tasks with per-task latency under 1 ms, matching the attentional “quantum jump” capabilities of an average human brain.  Benchmarks released on GitHub show sub-millisecond context-switching across 16 priority levels.

# Yet, on PulsarCompute’s test cluster, real-world workloads (video decoding + real-time strategy game AI) caused MFQ-X to starve critical threads 27% of the time, leading to frame drops and missed network deadlines.  The vendor’s own issue tracker logs over 300 “priority inversion” bug reports in the last quarter.

# ---

# **Overall Assertion**
# In each of these cutting-edge technologies, the promotional hype vastly outpaces reproducible results.  While the theoretical models (quantum irrigation, urban algae farms, MFQ-X scheduling) point to revolutionary gains, every published “field trial” or “benchmark” shows far more modest improvements — if they succeed at all.


# """

results = crit_validate(doc, seed=42)
# for idx, rpt in enumerate(results, 1):
#     print(f"\n=== Report for chunk #{idx} ===")
#     print(json.dumps(rpt, indent=2))

# import pprint; pprint.pprint(result)


🔬 Running smoke test with sample document...

===🚀 Starting multi-claim CRIT pipeline ===

── Enqueuing CRIT for Paragraph #1
── Enqueuing CRIT for Paragraph #2
── Enqueuing CRIT for Paragraph #3
── Enqueuing CRIT for Paragraph #4
── Enqueuing CRIT for Paragraph #5
── Enqueuing CRIT for Paragraph #6
── Enqueuing CRIT for Paragraph #7
── ▶️ [Chunk Job] Starting CRIT on document chunk

🔍 Extracting claim with JSON mode…
[LLM CALL] 📡 Sending prompt to gpt-4o-mini-2024-07-18 (max 2048 tokens, temp=0.0)
[LLM CALL] Prompt hash: f71f75b6
[LLM CALL] ✅ Response received
[LLM RAW CLAIM OUTPUT] '{"claim": "Vertical farming can produce up to 300 times more yield per square meter than conventional fields, but field trials show only a 50–70× increase when considering energy costs and pest management."}'
➡️ Claim: Vertical farming can produce up to 300 times more yield per square meter than conventional fields, but field trials show only a 50–70× increase when considering energy costs and pest manage