In [None]:
import os
import glob
import json
import math
import random
import re
from collections import Counter
import networkx as nx
import pdfplumber
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()

# Set the OpenAI API key from environment variable
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

# -- Document Loading ------------------------------------------------------

def load_docs_from_dir(dir_path):
    """
    Load all .pdf files from a directory and extract their full text.
    Returns a list of strings, one per PDF.
    """
    docs = []
    filenames = []
    for filepath in glob.glob(os.path.join(dir_path, '*.pdf')):
        try:
            text_pages = []
            with pdfplumber.open(filepath) as pdf:
                for page in pdf.pages:
                    text_pages.append(page.extract_text() or "")
            full_text = "\n".join(text_pages)
            docs.append(full_text)
            filenames.append(os.path.basename(filepath))
        except Exception as e:
            # skip unreadable PDFs
            print(f"Warning: could not load {filepath}: {e}")
    return docs, filenames

# -- 1. Extract topics and key concepts -----------------------------------

def extract_json_from_text(text):
    """Helper function to extract JSON content from markdown code blocks or raw text"""
    # Try to extract JSON from markdown code blocks
    json_match = re.search(r'```(?:json)?\s*(.*?)\s*```', text, re.DOTALL)
    if json_match:
        return json_match.group(1).strip()
    
    # If no code blocks, try to find JSON-like structures
    json_match = re.search(r'(\{.*\})', text, re.DOTALL)
    if json_match:
        return json_match.group(1).strip()
    
    # Return original text if no JSON structure found
    return text

def extract_concepts_from_docs(doc_texts, filenames, model="gpt-4o"):  
    """
    Call OpenAI API to extract topics and key concepts from each document.
    Returns list of dicts with keys 'topics' and 'key_concepts'.
    """
    extractions = []
    for i, text in enumerate(doc_texts):
        filename = filenames[i] if i < len(filenames) else f"doc_{i}"
        
        prompt = (
            "Extract high-level topics and key concepts from the following document. "
            f"Return JSON with keys 'topics' and 'key_concepts'.\n\n{text}"
        )
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are an expert summarizer. Return your response as a JSON object with 'topics' and 'key_concepts' as arrays."},
                {"role": "user", "content": prompt}
            ],
        )
        
        output = response.choices[0].message.content
        print("Raw output:", output)
        
        try:
            # Clean and extract JSON from the output
            json_str = extract_json_from_text(output)
            data = json.loads(json_str)
            
            # Ensure we have the expected keys
            if "topics" in data and "key_concepts" in data:
                extraction = {
                    "filename": filename,
                    "topics": data["topics"],
                    "key_concepts": data["key_concepts"]
                }
                extractions.append(extraction)
                print(f"Successfully extracted {len(data['topics'])} topics and {len(data['key_concepts'])} key concepts from {filename}")
            else:
                print(f"Warning: Parsed JSON doesn't have expected keys: {data.keys()}")
                extractions.append({
                    "filename": filename,
                    "topics": [], 
                    "key_concepts": []
                })
                
        except json.JSONDecodeError as e:
            print(f"Failed to parse JSON: {e}")
            extractions.append({
                "filename": filename,
                "topics": [], 
                "key_concepts": []
            })
    
    return extractions

# -- 2. Construct the concept graph ----------------------------------------

def build_concept_graph(extractions, eps=1e-6):
    """
    Build a unified graph G where nodes are topics + key concepts and
    edges weighted by log(freq+eps) based on co-occurrence in docs.
    Returns: G (nx.Graph), topic_nodes, kc_nodes
    """
    freq = Counter()
    all_topics, all_kcs = set(), set()

    for ex in extractions:
        nodes = ex["topics"] + ex["key_concepts"]
        for i in range(len(nodes)):
            for j in range(i+1, len(nodes)):
                u, v = sorted((nodes[i], nodes[j]))
                freq[(u, v)] += 1
        all_topics.update(ex["topics"])
        all_kcs.update(ex["key_concepts"])

    G = nx.Graph()
    for (u, v), f in freq.items():
        weight = math.log(f + eps)
        G.add_edge(u, v, weight=weight)

    print(f"Graph built with {len(all_topics)} topics and {len(all_kcs)} key concepts")
    return G, all_topics, all_kcs

# -- Helpers for sampling --------------------------------------------------

def softmax(weights):
    exps = [math.exp(w) for w in weights]
    s = sum(exps) or 1.0
    return [e/s for e in exps]


def random_walk(G, start, steps):
    """
    Random walk on graph G for given steps from 'start',
    with transition probabilities via softmax over edge weights.
    """
    path = [start]
    current = start
    for _ in range(steps):
        nbrs = list(G[current])
        if not nbrs:
            break
        weights = [G[current][n]['weight'] for n in nbrs]
        probs = softmax(weights)
        current = random.choices(nbrs, probs)[0]
        path.append(current)
    return path

# -- 3. Concept combination sampling ---------------------------------------

def sample_concept_combinations(
    G, topic_nodes, kc_nodes,
    num_samples=100,
    topic_walk_steps=(1, 2),
    kc_walk_steps=(3, 4)
):
    """
    Generate sampled sets of topics and key concepts via multi-stage random walks.
    Returns list of dicts: {'topics': set, 'key_concepts': set}
    """
    # Safety check
    if not topic_nodes:
        print("Error: No topics found. Cannot sample combinations.")
        return []
        
    G_topic = G.subgraph(topic_nodes)
    G_topic_kc = G.subgraph(topic_nodes | kc_nodes)
    G_kc = G.subgraph(kc_nodes)
    samples = []

    topics_list = list(topic_nodes)
    print(f"Sampling from {len(topics_list)} topics")
    
    for _ in range(num_samples):
        t0 = random.choice(topics_list)
        t_steps = random.choice(topic_walk_steps)
        topic_path = random_walk(G_topic, t0, t_steps)
        sampled_topics = set(topic_path)

        kc_cands = [nbr for t in sampled_topics for nbr in G_topic_kc[t] if nbr in kc_nodes]
        if kc_cands:
            k0 = random.choice(kc_cands)
            k_steps = random.choice(kc_walk_steps)
            kc_path = random_walk(G_kc, k0, k_steps)
            sampled_kcs = set(kc_path)
        else:
            sampled_kcs = set()

        samples.append({
            "topics": list(sampled_topics),  # Convert sets to lists for JSON serialization
            "key_concepts": list(sampled_kcs)
        })
    
    return samples

# -- 4. Question generation ------------------------------------------------

def generate_questions_for_samples(combos, docs, extractions, model="gpt-4o"):  
    """
    For each sampled combo, pick two docs via Jaccard on concept sets,
    then call LLM to generate questions.
    Returns list of dicts: {'sample': combo, 'questions': [...]}.
    """
    doc_concepts = [set(ex['topics'] + ex['key_concepts']) for ex in extractions]
    # doc_concepts= doc_concepts[:1]
    results = []
    max_samples = 100
    # combos = combos[:max_samples]
    for i, combo in enumerate(combos):
        combo_id = f"combo_{i+1}"
        kg = set(combo['topics']) | set(combo['key_concepts'])
        sims = []
        for idx, dc in enumerate(doc_concepts):
            inter = kg & dc
            union = kg | dc
            sims.append((len(inter) / (len(union) or 1), idx))
        sims.sort(reverse=True)
        top_idxs = [i for _, i in sims[:2]]
        refs = [docs[i] for i in top_idxs]
        ref_files = [extractions[i]["filename"] for i in top_idxs]
        System_prompt =f"""
Each question must follow these instructions:
Model a Physical Scenario: Start from a real-world or idealized setup. Avoid abstract Physics problems or purely conceptual statements.
Target a Solvable Quantity: Ask for a clear symbolic expression of a physical variable (e.g., tension, acceleration, energy).
Force Multi-Step Reasoning: Ensure the question involves a sequence of physics laws, transformations, and derivations to reach the answer.
Avoid Redundancy: Exclude extraneous details or variables that do not impact the final solution.
Be Unique: Do not rephrase standard textbook problems; ensure originality and complexity.
Single solution: Expect a single symbolic expression, unambiguous, presented in LaTeX. Multiple equivalent algebraic forms are allowed. No equations or floating-point approximations.
Use rigorous, concise phrasing.
Avoid colloquial or ambiguous terminology.
Units must be consistent; symbols should follow standard notation.
"""
        prompt = (
            f"Generate a set of difficult Physics questions based on the following:\n"
            
            f"Topics: {combo['topics']}\n"
            f"Key Concepts: {combo['key_concepts']}\n"
            f"Reference Doc 1:\n{refs[0]}\n"
        )
        if len(refs) > 1:
            prompt += f"Reference Doc 2:\n{refs[1]}\n"
        prompt += "Return a JSON array of questions."

        # from ollama import chat
        # from ollama import ChatResponse

        # response: ChatResponse = chat(model='qwen3:8b', 
        #                                messages=[
        #         {"role": "system", "content": System_prompt},
        #         {"role": "user", "content": prompt}
        #     ])
        # # print(response['message']['content'])
        # # or access fields directly from the response object
        # print(response.message.content)
        # content = response.message.content
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": System_prompt},
                {"role": "user", "content": prompt}
            ]
        )
        content = response.choices[0].message.content
        
        try:
            # Clean and extract JSON from the output
            json_str = extract_json_from_text(content)
            questions = json.loads(json_str)
            if not isinstance(questions, list):
                # If the output is an object with a questions key
                if isinstance(questions, dict) and "questions" in questions:
                    questions = questions["questions"]
                else:
                    questions = [str(questions)]
        except json.JSONDecodeError:
            questions = [content]

        results.append({
            "id": combo_id,
            "topics": combo['topics'],
            "key_concepts": combo['key_concepts'],
            "reference_files": ref_files,
            "questions": questions
        })
        max_samples-=1
        if max_samples == 0:
            break


    return results

# -- Save outputs to files ------------------------------------------------

def save_extractions(extractions, output_file="document_extractions.json"):
    """Save the extracted topics and key concepts for each document"""
    # Ensure the extractions are serializable (convert sets to lists)
    serializable_extractions = []
    for ex in extractions:
        serializable_extractions.append({
            "filename": ex["filename"],
            "topics": list(ex["topics"]),
            "key_concepts": list(ex["key_concepts"])
        })
    
    with open(output_file, "w") as f:
        json.dump(serializable_extractions, f, indent=2)
    
    print(f"Saved document extractions to {output_file}")

def save_questions_with_topics(questions, output_file="questions_with_topics.json"):
    """Save the generated questions with their topic combinations"""
    with open(output_file, "w") as f:
        json.dump(questions, f, indent=2)
    
    print(f"Saved questions with topic combinations to {output_file}")

# -- Main Execution --------------------------------------------------------

# if __name__ == "__main__":
#     # Create output directory if it doesn't exist
#     output_dir = "output_apple"
#     os.makedirs(output_dir, exist_ok=True)
    
#     # adjust this path to where your .pdf docs live
#     docs_dir = "docs/"
    
#     print("Loading documents...")
#     docs, filenames = load_docs_from_dir(docs_dir)
#     print(f"Loaded {len(docs)} documents")
    
#     if not docs:
#         print("No documents found. Please check the docs directory.")
#         exit(1)
    
#     # 1) Extract topics & KCs
#     print("\nExtracting topics and key concepts...")
#     extractions = extract_concepts_from_docs(docs, filenames)
    
#     # Save extractions to file
#     save_extractions(extractions, os.path.join(output_dir, "document_extractions.json"))
    
#     # Verify we have valid extractions
#     valid_extractions = [ex for ex in extractions if ex["topics"] or ex["key_concepts"]]
#     if not valid_extractions:
#         print("No valid topics or key concepts extracted. Check your data and API responses.")
#         exit(1)
    
#     # 2) Build graph
#     print("\nBuilding concept graph...")
#     G, topic_nodes, kc_nodes = build_concept_graph(extractions)
    
#     if not topic_nodes:
#         print("No topics found in the graph. Cannot proceed.")
#         exit(1)
    
#     # 3) Sample combinations
#     print("\nSampling concept combinations...")
#     combos = sample_concept_combinations(G, topic_nodes, kc_nodes, num_samples=10)  # Reduced for testing
    
#     if not combos:
#         print("Failed to generate concept combinations.")
#         exit(1)
    
#     # Save topic combinations
#     with open(os.path.join(output_dir, "topic_combinations.json"), "w") as f:
#         json.dump(combos, f, indent=2)
    
#     # 4) Generate questions
#     print("\nGenerating questions for each combination...")
#     q_outputs = generate_questions_for_samples(combos, docs, extractions)
    
#     # Save questions with topics
#     save_questions_with_topics(q_outputs, os.path.join(output_dir, "questions_with_topics.json"))
    
#     # Display results
#     print("\n===== GENERATED QUESTIONS =====")
#     for idx, out in enumerate(q_outputs, 1):
#         print(f"\nSample {idx}:")
#         print(f"Topics: {out['topics']}")
#         print(f"Key Concepts: {out['key_concepts']}")
#         print(f"Reference Files: {out['reference_files']}")
#         print("Questions:")
#         for q in out['questions']:
#             print(f" - {q}")
    
#     print(f"\nAll outputs saved to directory: {output_dir}") 

if __name__ == "__main__":
    # Create output directory if it doesn't exist
    # Create output directory if it doesn't exist
    output_dir = "output_apple"
    os.makedirs(output_dir, exist_ok=True)
    
    # adjust this path to where your .pdf docs live
    docs_dir = "docs/"
    
    print("Loading documents...")
    docs, filenames = load_docs_from_dir(docs_dir)
    print(f"Loaded {len(docs)} documents")
    document_extractions = []
    with open("output_apple/document_extractions.json", "r") as f:
        document_extractions = json.load(f)
    for doc in document_extractions:
        print(doc)
        break

    topic_combinations = []
    with open("output_apple/topic_combinations.json", "r") as f:
        topic_combinations = json.load(f)
    for combo in topic_combinations:
        print(combo)
        break

    # generate questions 
    print("Generating questions...")
    q_outputs = generate_questions_for_samples(topic_combinations, docs, document_extractions)

    # Save questions with topics
    save_questions_with_topics(q_outputs, os.path.join(output_dir, "questions_with_topics.json"))
    
    # Display results
    print("\n===== GENERATED QUESTIONS =====")
    for idx, out in enumerate(q_outputs, 1):
        print(f"\nSample {idx}:")
        print(f"Topics: {out['topics']}")
        print(f"Key Concepts: {out['key_concepts']}")
        print(f"Reference Files: {out['reference_files']}")
        print("Questions:")
        for q in out['questions']:
            print(f" - {q}")
    
    print(f"\nAll outputs saved to directory: {output_dir}")

CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, def

Loading documents...


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, def



CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, def

Loaded 25 documents
{'filename': '2408.13687v1.pdf', 'topics': ['Quantum Error Correction', 'Surface Code', 'Quantum Computing', 'Fault-Tolerance', 'Real-Time Decoding', 'Quantum Algorithms', 'Quantum Memory'], 'key_concepts': ['Logical Qubits', 'Error Threshold', 'Distance-5 and Distance-7 Codes', 'Logical Error Rate', 'Decoder Latency', 'Error Suppression Factor', 'Component Errors', 'Quantum Processor Stability', 'Repetition Codes', 'Rare Correlated Error Events', 'Classical Coprocessor Requirements', 'Real-Time Error Decoding', 'Physical vs Logical Qubit Lifetime', 'Quantum Gate Fidelity', 'Leakage Removal in Quantum Systems']}
{'topics': ['Astrophysical implications', 'Magnetohydrodynamics'], 'key_concepts': ['Momentum conservation in superfluid systems', 'Corrections to proton current near vortex core', 'Longitudinal versus transverse forces']}
Generating questions...
Saved questions with topic combinations to output_apple/questions_with_topics.json

===== GENERATED QUESTIONS ===

### download data from csv

In [3]:
import os
import argparse
import pandas as pd
import requests
from urllib.parse import urlparse, unquote

def download_files(
    csv_path="Researchpapers_PhD_Synthetic_Data - Physics.csv",
    output_dir="docs"
) -> None:
    """
    Reads a CSV with a 'Link' column and downloads each URL to output_dir,
    saving each file with a .pdf extension.
    """
    # Load CSV
    df = pd.read_csv(csv_path)
    if 'Link' not in df.columns:
        raise ValueError("CSV must contain a 'Link' column.")

    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    total = len(df['Link'].dropna())
    for idx, url in enumerate(df['Link'].dropna(), start=1):
        try:
            # Parse URL and derive a safe filename
            parsed = urlparse(url)
            raw_name = os.path.basename(parsed.path)
            raw_name = unquote(raw_name)  # decode URL-encoded characters

            if raw_name:
                name, ext = os.path.splitext(raw_name)
                # If there's no .pdf extension, force it
                filename = f"{idx}.pdf" if ext.lower() != '.pdf' else raw_name
            else:
                filename = f"{idx}.pdf"

            out_path = os.path.join(output_dir, filename)

            # Stream download
            with requests.get(url, stream=True) as r:
                r.raise_for_status()
                with open(out_path, 'wb') as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        if chunk:
                            f.write(chunk)

            print(f"[{idx}/{total}] Downloaded → {filename}")

        except Exception as e:
            print(f"[{idx}/{total}] FAILED  → {url}\n    {e}")

if __name__ == "__main__":
    download_files()


[1/23] Downloaded → 1.pdf
[2/23] Downloaded → 2.pdf
[3/23] Downloaded → 3.pdf
[4/23] Downloaded → 4.pdf
[5/23] Downloaded → 5.pdf
[6/23] Downloaded → 6.pdf
[7/23] Downloaded → 7.pdf
[8/23] Downloaded → 8.pdf
[9/23] Downloaded → 9.pdf
[10/23] Downloaded → 10.pdf
[11/23] Downloaded → 11.pdf
[12/23] Downloaded → 12.pdf
[13/23] Downloaded → 13.pdf
[14/23] Downloaded → 14.pdf
[15/23] Downloaded → 15.pdf
[16/23] Downloaded → 16.pdf
[17/23] Downloaded → 17.pdf
[18/23] Downloaded → 18.pdf
[19/23] Downloaded → 19.pdf
[20/23] Downloaded → 20.pdf
[21/23] Downloaded → 21.pdf
[22/23] FAILED  → https://arxiv.org/abs/2505.06432'
    404 Client Error: Not Found for url: https://arxiv.org/abs/2505.06432'
[23/23] Downloaded → 23.pdf


## Author - Critique Model

In [None]:
import logging
from typing import List, Dict

from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
from langchain.chains import LLMChain

# Explicit logging configuration for console and file
logger = logging.getLogger()
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

# File handler
file_handler = logging.FileHandler('conversation.log', mode='w')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)

# Console handler
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)

# ---- Core author–critic loop for Chemistry ----
def author_critique_loop(
    topics: List[str],
    concepts: List[str],
    guidelines: str,
    few_shot_examples: List[Dict[str, str]],
    max_turns: int = 5,
) -> str:
    """
    Runs the author–critic loop for chemistry questions and logs each turn.
    Returns the final set of questions.
    """
    # Initialize LLMs
    author_llm = ChatOpenAI(model_name="o4-mini", temperature=1)
    critic_llm = ChatOpenAI(model_name="o3", temperature=1)

    # Build Few-Shot prompt for author
    example_prompt = PromptTemplate(
        input_variables=["example"],
        template="Example:\n{example}\n---"
    )
    author_fs_prompt = FewShotPromptTemplate(
        examples=few_shot_examples,
        example_prompt=example_prompt,
        prefix="You are a chemistry education specialist. Generate novel university-level chemistry questions. Here are some examples:",
        suffix=(
            "Based on topics: {topics}, key concepts: {concepts}, "
            "and guidelines: {guidelines}, produce a set of new, high-quality chemistry questions."
        ),
        input_variables=["topics", "concepts", "guidelines"],
    )
    author_chain = LLMChain(llm=author_llm, prompt=author_fs_prompt)

    # Build Critic prompt
    critic_prompt = PromptTemplate(
        input_variables=["questions", "guidelines"],
        template=(
            "You are a chemistry assessment expert. Critique the following questions:\n"
            "{questions}\n"
            "Evaluate them against these guidelines:\n{guidelines}\n"
            "Provide concise, actionable feedback on how to improve."
        ),
    )
    critic_chain = LLMChain(llm=critic_llm, prompt=critic_prompt)

    questions, feedback = "", ""

    for turn in range(max_turns):
        # ---- AUTHOR STEP ----
        if turn == 0:
            prompt = author_fs_prompt.format_prompt(
                topics=topics, concepts=concepts, guidelines=guidelines
            ).to_string()
            response = author_chain.run(
                topics=topics, concepts=concepts, guidelines=guidelines
            )
        else:
            refine = PromptTemplate(
                input_variables=["questions", "feedback"],
                template=(
                    "Refine these questions based on feedback:\n"
                    "Questions:\n{questions}\n"
                    "Feedback:\n{feedback}\n"
                    "Provide an improved set of chemistry questions following the original criteria."
                ),
            )
            prompt = refine.format(questions=questions, feedback=feedback)
            response = LLMChain(llm=author_llm, prompt=refine).run(
                questions=questions, feedback=feedback
            )

        logger.info(f"[Author turn {turn}] Prompt:\n{prompt}")
        logger.info(f"[Author turn {turn}] Response:\n{response}")
        questions = response

        # ---- CRITIC STEP ----
        prompt_c = critic_prompt.format(
            questions=questions, guidelines=guidelines
        )
        response_c = critic_chain.run(
            questions=questions, guidelines=guidelines
        )
        logger.info(f"[Critic turn {turn}] Prompt:\n{prompt_c}")
        logger.info(f"[Critic turn {turn}] Response:\n{response_c}")
        feedback = response_c

    return questions


if __name__ == "__main__":
    # Chemistry-specific example usage
    topics = [
        "Thermodynamics of Chemical Reactions",
        "Organic Reaction Mechanisms",
        "Coordination Chemistry",
        "Electrochemical Cells"
    ]
    concepts = [
        "Use of enthalpy, entropy, and Gibbs free energy to predict reaction spontaneity.",
        "Mechanistic steps in nucleophilic substitution and elimination reactions.",
        "Formation constants and isomerism in transition metal complexes.",
        "Calculating cell potential using standard reduction potentials and Nernst equation."
    ]
    guidelines = (
        "1. Expect answers as numerical values with appropriate units or chemical compounds named using official IUPAC nomenclature. "
        "2. Force Multi-Step Reasoning: Ensure calculations or mechanistic steps are detailed. "
        "3. Avoid Redundancy: Exclude extraneous information that does not affect the answer. "
        "4. Be Unique: Do not repeat standard textbook examples; ensure originality and complexity. "
        "5. Single solution: Provide one correct numerical value or compound name. "
        "6. Use rigorous, concise phrasing. "
        "7. Avoid colloquial or ambiguous terminology. "
        "8. Units must be consistent; chemical nomenclature must follow IUPAC rules."
        "9. Questions must not have multiple parts to answer, there should be only 1 solution to each quesion"

    )
    few_shot_examples = [
        {"example": (
            "Good: Calculate the molar enthalpy change when 2.00 moles of cyclohexane undergo complete combustion. "
            "Use standard enthalpies of formation and show each calculation step, then report the final value in kJ/mol."
        )},
        {"example": (
            "Bad: Define enthalpy."
        )},
    ]

    final_questions = author_critique_loop(
        topics, concepts, guidelines, few_shot_examples, max_turns=4
    )
    print(f"Final Chemistry Questions:\n{final_questions}\n")
    logger.info("Author–Critique loop for chemistry complete.")
    #save final questions in a file 
    
    with open("final_questions_chemistry.txt", "w") as f:
        f.write(final_questions)
    logging.info("Author–Critique loop complete.")