In [300]:
# %% Minimal setup
# If needed (uncomment in a notebook):
# !pip install requests python-dotenv

import os, json, textwrap, re, time
from http.client import responses

import requests
from mpmath.math2 import math_sqrt

API_KEY  = os.getenv("OPENAI_API_KEY", "cse476")
API_BASE = os.getenv("API_BASE", "http://10.4.58.53:41701/v1")
MODEL    = os.getenv("MODEL_NAME", "bens_model")

def call_model_chat_completions(prompt: str,
                                system: str = "You are a helpful assistant. Reply with only the final answer—no explanation.",
                                model: str = MODEL,
                                temperature: float = 0.0,
                                timeout: int = 60) -> dict:
    """
    Calls an OpenAI-style /v1/chat/completions endpoint and returns:
    { 'ok': bool, 'text': str or None, 'raw': dict or None, 'status': int, 'error': str or None, 'headers': dict }
    """
    url = f"{API_BASE}/chat/completions"
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type":  "application/json",
    }
    payload = {
        "model": model,
        "messages": [
            {"role": "system", "content": system},
            {"role": "user",   "content": prompt}
        ],
        "temperature": temperature,
        "max_tokens": 128,
    }

    try:
        resp = requests.post(url, headers=headers, json=payload, timeout=timeout)
        status = resp.status_code
        hdrs   = dict(resp.headers)
        if status == 200:
            data = resp.json()
            text = data.get("choices", [{}])[0].get("message", {}).get("content", "")
            return {"ok": True, "text": text, "raw": data, "status": status, "error": None, "headers": hdrs}
        else:
            # try best-effort to surface error text
            err_text = None
            try:
                err_text = resp.json()
            except Exception:
                err_text = resp.text
            return {"ok": False, "text": None, "raw": None, "status": status, "error": str(err_text), "headers": hdrs}
    except requests.RequestException as e:
        return {"ok": False, "text": None, "raw": None, "status": -1, "error": str(e), "headers": {}}


# self reflection :First inference method
This was implemented in the agent loop

# Three of thought: Second inference method


In [301]:
class Node:
    def __init__(self, thought, children = None):
        self.thought = thought
        self.children = children or []

class Tot:
    def __init__(self, prompt, model = None, iterations = 3, maxTokens = 128, treeOutput = None):
        self.root = Node(prompt)
        self.model = model or MODEL
        self.iterations = iterations
        self.current_thoughts = [self.root]
        self.maxTokens = maxTokens
        self.treeOutput = ""


    def EThoughts(self, thoughtN):
        newThoughtNodes = []

        for nd in thoughtN:
            prompt = f"Given the current thought: '{nd.thought}', provide three concise thoughts that expand this idea further."
            response = call_model_chat_completions(prompt)

            if response:
                child = Node(response["text"])
                nd.children.append(child)
                newThoughtNodes.append(child)

        return newThoughtNodes


    def Run(self):
        iteration = 0
        while self.current_thoughts and iteration < self.iterations:
            #print(f"Iteration {iteration + 1}:")
            self.current_thoughts = self.EThoughts(self.current_thoughts)

            # for tNode in self.current_thoughts:
            #     print(f"Explored Thought: {tNode.thought}")

            iteration += 1


    def updateThought(self, nThought):
        self.root = Node(nThought)
        self.current_thoughts = [self.root]


    def showTree(self, Node, lv=0, prefix="",buffer = None):

        # Reset buffer only on first/top-level call
        if lv == 0:
            self.treeOutput = ""

        connector = "└── " if lv > 0 else ""
        #print(f"{prefix}{connector}{Node.thought}")
        line = f"{prefix}{connector}{Node.thought}"

        self.treeOutput += line + "\n"

        # Prepare indentation for children
        child_prefix = prefix + ("    " if lv > 0 else "")

        for child in Node.children:
            self.showTree(child, lv + 1, child_prefix)

    def selectAnswer(self, original_prompt: str):

        self.showTree(self.root)

        ## USE FOR TESTING
        #print("Printing the ogPrompt",original_prompt)
        answer = call_model_chat_completions(original_prompt ,system= f"You are given a list of thoughts to help you decide on the answer look at the prompt and select branch with the most nodes. respond with just the answer. Attached below is the train of thought {self.treeOutput}")

        #print("the final answer is", answer["text"])
        #print("The tree out is", self.treeOutput)

        return answer["text"]



# Self-consistency : Third inference method






In [302]:
from collections import Counter

def determineProblem(prompt):
    problemType = call_model_chat_completions(prompt, system= "Determine what type of problem this is and return one of the following choices "
                                                              "[Math reasoning, common sense, logic]")
    return problemType["text"]

def selfConsistency(prompt, num_responses = 5):
    responses = []
    for _ in range(num_responses):

        ## funny way to rem temperature: When ur girlfriend is hot"mad" she is going to say a lot more and complain. When she is cold she will be relaxed and say less amen
        #Before solving Response{_} must be written down
        response = call_model_chat_completions(prompt, system = f"", temperature= .8)
        responses.append(response["text"])

    formatFinalAnswers = responses
    #formatFinalAnswers  = "\n---\n".join([f"{key}:\n{value}" for key, value in responses])

    #print("The formatFinal answers",formatFinalAnswers)
    selectionPrompt = f"""

    I have generated several responses for the given problem. Please evaluate them and determine which is the most accurate, comprehensive, and well written"

    Here are the responses:
    {formatFinalAnswers}

    Review all candidate responses, determine which one is the strongest.
    Return the answer after the Response: Example 'Response0: Yes'
    Return would be "yes"

    """

    goldenSolution = call_model_chat_completions(selectionPrompt)
    #print("The golden solution: ", goldenSolution) USE FOR TESTING

    return goldenSolution["text"], formatFinalAnswers



def runSelfConsistency(prompt):
    #problemType = determineProblem(prompt)
    #print(problemType)
    Answers = selfConsistency(prompt, num_responses= 5)
    return Answers[0]
    print("The golden solution is", Answers[0], "\n")
    # for i in Answers[1]:
    #     print(i)


# prompt = "For certain physical scenarios, it is impossible to model forces as being due to gradient of potentials. This is often due to macrophysical considerations that yield forces as arising from a macroscopic statistical average of microstates. For example, friction is caused by the gradients of numerous electrostatic potentials between the atoms, but manifests as a force model that is independent of any macroscale position vector. Nonconservative forces other than friction include other contact forces, tension, compression, and drag. However, for any sufficiently detailed description, all these forces are the results of conservative ones since each of these macroscopic forces are the net results of the gradients of microscopic potentials. It is always possible to model forces as being due to what?"
# runSelfConsistency(prompt)

#
# prompt = "if there are three cars in the parking lot and 289 more cars arrive, how many cars are in the parking lot"
# problemType = determineProblem(prompt)
# print(problemType)
#
# Answers = selfConsistency(prompt, num_responses= 5)
# #print(Answers[0])
#
#
# print("The golden solution is", Answers[0])
# for i in Answers[1]:
#     print(i)







# Agent loop

Action route:
1. Math or logic problems: self-consistency
2. complex problems strategic planning, exploration, and backtracking: tree of thought
3. Common sense

In [303]:
class agent:
    #actionList =  ["tot", "Consistency", "reflection" ]
    def __init__(self, model=None):
        self.model = model

        self.action_list = ["tot", "Consistency", "reflection" ]
        self.problem_types = ["Math reasoning", "Common sense", "logic"]

    # def reason(self, context):
    #     #print("working")

    def reflection(self, prompt: str):
        #print("yes")

        # tot_agent = Tot(prompt)
        # tot_agent.Run()
        # tot_agent.showTree(tot_agent.root)
        # tot_agent.selectAnswer(prompt)

        oringialAnswer = runSelfConsistency(prompt)

        reflectionPrompt = f"""
        You are a critic and solver.

        here is the original question:
        {prompt}

        here is an initial attempt at the solution
        {oringialAnswer}

        2. Provide only the final answer after correction.
        """

        system_reflect = (
        "You are a strict but helpful reviewer. "
        "Make sure the output is only the FINAL ANSWER."
        )

        refined = call_model_chat_completions(
            reflectionPrompt,
            system=system_reflect
        )

        if not refined or "text" not in refined:
            return oringialAnswer


        return refined["text"]



    def planAction(self, prompt: str):

        ##example
        tests = [
            {
                "id": "math_inequality",
                "type": "numeric",  # grader will prefer numeric extraction
                "prompt": "Solve for the smallest integer n such that 3n + 5 > 26. Answer with just the integer.",
                "expected": "8",    # Because 3n > 21 => n > 7, smallest integer is 8
            },
            {
                "id": "commonsense_ice",
                "type": "text",
                "prompt": (
                    "You place an ice cube in a glass of water and mark the water level. "
                    "After the ice melts, does the water level rise, fall, or stay the same? "
                    "Answer with exactly one of: 'rise', 'fall', 'stay the same'."
                ),
                "expected": "stay the same",
            },
            {
                "id": "logic_race",
                "type": "text",
                "prompt": (
                    "In a race, you pass the person in second place. What position are you now in? "
                    "Answer with a single word like 'first', 'second', 'third'."
                ),
                "expected": "second",
            },
            ]
        systemPrompt = f"""
        Determine what type of problem this is and return one of the following choices "[Math reasoning, Common sense, logic]"
        attached below are examples of each problem type. When returning the answer just return the text
        {tests}
        """
        #action =  ["tot", "Consistency", "reflection" ]
        #ProblemTypes = ["Math reasoning", "Common sense", "logic"]

        DefineProblem = call_model_chat_completions(prompt, system= systemPrompt)
        #DefineProblem = call_model_chat_completions(prompt, system=systemPrompt)

        raw_text = (DefineProblem or {}).get("text")

        if not raw_text:
            return self.action_list[1]

        label = raw_text.strip()
        if not label:
            return self.action_list[1]


        if not DefineProblem or "text" not in DefineProblem:
            #print("Could not classify problem, defaulting to Consistency.")
            return self.action_list[1]  # default to self-consistency
            #return action[1]

        label = DefineProblem["text"].strip()

        ## Math Tree of Thought
        if label == self.problem_types[0]:
            #print("it is" ,DefineProblem["text"])
            return self.action_list[0]

         ## common sense Self-Consistency
        elif label == self.problem_types[1]:
            return self.action_list[1]

        ## logic puzzles
        elif label == self.problem_types[2]:
           # print("it is" ,DefineProblem["text"])
            return self.action_list[2]

        return self.action_list[1]


    def executeAction(self, action: str, prompt: str):
        actionList =  ["tot", "Consistency", "reflection" ]
        if action == actionList[0]:
            #print("\nPreforming Train of thought")
            tot_agent = Tot(prompt)
            tot_agent.Run()
            tot_agent.showTree(tot_agent.root)
            return tot_agent.selectAnswer(prompt)

        elif action == actionList[1]:
            #print("\npreforming consistency")
            return runSelfConsistency(prompt)

        elif action == actionList[2]:
            #print("\npreforming reflection")
            return self.reflection(prompt)
            #return runSelfConsistency(prompt)


        else:
            #print("Unknown action:", action)
            return None

    def runAgent(self, task):
        # Extract prompt from task
        if isinstance(task, dict):
            prompt = task.get("input") or task.get("prompt")
        else:
            prompt = str(task)

        if not prompt:
            raise ValueError("runAgent: No prompt found in task.")

        # reasoning context
        # self.reason({"task": task})

        # Decide which technique to use
        action = self.planAction(prompt)
        #print("Chosen action:", action)

        if action is None:
            action = self.action_list[1]  # "Consistency"

        # Execute the technique
        answer = self.executeAction(action, prompt)

        if not isinstance(answer, str) or not answer.strip():
            answer = "NO_ANSWER_GENERATED_DUE_TO_API_OR_CONNECTION_ERROR"


        answer = call_model_chat_completions(f"The answer is {answer}", system= "Return ONLY the essential answer no explanation needed")
        #print(answer["text"]) TESTING
        return answer["text"]




# if __name__ == "__main__":
#      agent = agent()
#
#      task = {
#          "input": "Is it possible to get killed walking to the Very Large Telescope? Facts: The Very Large Telescope is in the Atacama Desert The Atacama Desert is the driest hot desert in the world."
#     }
#
#      result = agent.runAgent(task)
#      print("Final answer:", result)


# A1 = agent
# Action = A1.planAction("You place an ice cube in a glass of water and mark the water level. "
#             "After the ice melts, does the water level rise, fall, or stay the same? "
#             "Answer with exactly one of: 'rise', 'fall', 'stay the same'.")
# A1.executeAction(Action)



# Final testing: Answer generation

In [304]:
#!/usr/bin/env python3
"""
Generate a placeholder answer file that matches the expected auto-grader format.

Replace the placeholder logic inside `build_answers()` with your own agent loop
before submitting so the ``output`` fields contain your real predictions.

Reads the input questions from cse_476_final_project_test_data.json and writes
an answers JSON file where each entry contains a string under the "output" key.
"""

from __future__ import annotations

import json
from pathlib import Path
from typing import Any, Dict, List


INPUT_PATH = Path("first50Inputs.json")
OUTPUT_PATH = Path("first50_Outputs.json")

# INPUT_PATH = Path("question.json")
# OUTPUT_PATH = Path("answers.json")

def load_questions(path: Path) -> List[Dict[str, Any]]:
    with path.open("r") as fp:
        data = json.load(fp)
    if not isinstance(data, list):
        raise ValueError("Input file must contain a list of question objects.")
    return data


def build_answers(questions: List[Dict[str, Any]]) -> List[Dict[str, str]]:
    answers = []
    count = 0
    my_agent = agent()   # create a single agent instance
    for idx, question in enumerate(questions, start=1):
        # Example: assume you have an agent loop that produces an answer string.
        # result = call_model_chat_completions(question["input"])
        # answers.append({"output": result["text"]})
        result = my_agent.runAgent(question)
        answers.append({"output": result})

        count += 1
        if count == 100:
            count = 0
            print(idx, "\n")

        # real_answer = agent_loop(question["input"])
        # answers.append({"output": real_answer})

        # placeholder_answer = f"Placeholder answer for question {idx}"
        # answers.append({"output": placeholder_answer})
    return answers


def validate_results(
    questions: List[Dict[str, Any]], answers: List[Dict[str, Any]]
) -> None:
    if len(questions) != len(answers):
        raise ValueError(
            f"Mismatched lengths: {len(questions)} questions vs {len(answers)} answers."
        )
    for idx, answer in enumerate(answers):
        if "output" not in answer:
            raise ValueError(f"Missing 'output' field for answer index {idx}.")
        if not isinstance(answer["output"], str):
            raise TypeError(
                f"Answer at index {idx} has non-string output: {type(answer['output'])}"
            )
        if len(answer["output"]) >= 5000:
            raise ValueError(
                f"Answer at index {idx} exceeds 5000 characters "
                f"({len(answer['output'])} chars). Please make sure your answer does not include any intermediate results."
            )


def main() -> None:
    questions = load_questions(INPUT_PATH)
    answers = build_answers(questions)

    with OUTPUT_PATH.open("w") as fp:
        json.dump(answers, fp, ensure_ascii=False, indent=2)

    with OUTPUT_PATH.open("r") as fp:
        saved_answers = json.load(fp)
    validate_results(questions, saved_answers)
    print(
        f"Wrote {len(answers)} answers to {OUTPUT_PATH} "
        "and validated format successfully."
    )


if __name__ == "__main__":
    main()



Wrote 50 answers to first50_Outputs.json and validated format successfully.
