In [1]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import re
import csv
from typing import List, Dict, Tuple, Optional, Any

Mounted at /content/drive


In [4]:
## HELPER FUNCTIONS ##

def convert_value(raw: str) -> Any:
    """
    Convert a raw string token into a Python type.

    """
    import ast

    raw = raw.strip()

    #Normalize booleans
    if raw.lower() == "true":
        return True
    if raw.lower() == "false":
        return False

    #Try to evaluate as Python literal
    try:
        return ast.literal_eval(raw)
    except Exception:
        return raw.strip('"').strip("'")

def split_args(argstr: str) -> Dict[str, Any]:

    args: Dict[str, Any] = {}

    #Use CSV reader to handle quoted strings with commas
    row = next(csv.reader([argstr], delimiter=',', skipinitialspace=True, quotechar='"'))

    for field in row:
        field = field.strip()
        if not field:
            continue

        #Split by '=' to get key-value pair
        if "=" in field:
            key, val = field.split("=", 1)
            args[key.strip()] = convert_value(val)
        else:
            args[field] = True

    return args

In [5]:
## Step 1. ##

def parse_action(line: str) -> Optional[Tuple[str, Dict[str, Any]]]:
    """
    Returns (action_name, args_dict) or None on invalid input.
    """
    #Must start with 'Action:'
    prefix = "Action:"
    if not line.strip().startswith(prefix):
        return None

    s = line[len(prefix):].strip()

    #Extract action name
    lb = s.find("[")
    rb = s.rfind("]")

    if lb == -1 or rb == -1 or rb < lb:
        return None

    name = s[:lb].strip()
    if not name or not all(c.isalpha() or c == "_" for c in name):
        return None

    #Extract and parse arguments
    inner = s[lb + 1 : rb].strip()
    args = split_args(inner) if inner else {}

    return name, args

In [6]:
## Step 2. ##
def format_history(trajectory: List[Dict[str, str]]) -> str:
    """
    Format conversation history for the prompt.

    Input: List of dictionaries with 'thought', 'action', 'observation' keys
    Output: Formatted string for inclusion in prompt
    """
    lines: List[str] = []

    for step in trajectory:
        lines.append(f"Thought: {step['thought']}")
        lines.append(f"Action: {step['action']}")
        lines.append(f"Observation: {step['observation']}")

    return "\n".join(lines)

In [10]:
## Step 3. ##

SYSTEM_PREAMBLE = """You are a helpful rideshare decision agent. You help users choose between Uber and Lyft based on historical Boston rideshare data.

Available tools:
- search[query="<text>", k=<int>] : Searches historical trips and returns top-k similar results based on route, time, and conditions.
- finish[answer="<final answer>"] : Provides the final recommendation to the user.

Follow the exact step format:
Thought: <your reasoning about what to do next>
Action: <one of the tool calls above, or finish[...]>

IMPORTANT: Respond with EXACTLY two lines in this format:
Thought: <one concise sentence>
Action: <either search[...] or finish[answer=...]>

Do NOT include Observation - the system will provide that.""".strip()

def make_prompt(user_query: str, trajectory: List[Dict[str, str]]) -> str:
    """
    Construct the complete prompt for the language model.

    Input: User query string and trajectory list
    Output: Complete formatted prompt string
    """
    history_block = format_history(trajectory)

    return (
        f"{SYSTEM_PREAMBLE}\n\n"
        f"User Question: {user_query}\n\n"
        f"{history_block}\n"
        f"Next step:\n"
        f"Thought:"
    )


#Test prompt generation
test_query = "I need a ride from Northeastern to Logan Airport on Friday at 3pm. Should I use Uber or Lyft?"
test_trajectory = []

test_prompt = make_prompt(test_query, test_trajectory)

In [11]:
#Test with conversation history
test_trajectory_multi = [
    {
        "thought": "I should search for historical trips from Northeastern to Logan Airport on Friday afternoons.",
        "action": "search[query=\"Northeastern Logan Airport Friday afternoon\", k=3]",
        "observation": "{\"results\": [{\"title\": \"Northeastern University → Boston Logan Intl\", \"price\": 28.50, \"cab_type\": \"Uber\", \"surge\": 1.0}]}"
    }
]

test_prompt_multi = make_prompt(test_query, test_trajectory_multi)

print("Generated Prompt (After First Search):")
print("=" * 80)
print(test_prompt_multi)
print("=" * 80)

Generated Prompt (After First Search):
You are a helpful rideshare decision agent. You help users choose between Uber and Lyft based on historical Boston rideshare data.

Available tools:
- search[query="<text>", k=<int>] : Searches historical trips and returns top-k similar results based on route, time, and conditions.
- finish[answer="<final answer>"] : Provides the final recommendation to the user.

Follow the exact step format:
Thought: <your reasoning about what to do next>
Action: <one of the tool calls above, or finish[...]>

IMPORTANT: Respond with EXACTLY two lines in this format:
Thought: <one concise sentence>
Action: <either search[...] or finish[answer=...]>

Do NOT include Observation - the system will provide that.

User Question: I need a ride from Northeastern to Logan Airport on Friday at 3pm. Should I use Uber or Lyft?

Thought: I should search for historical trips from Northeastern to Logan Airport on Friday afternoons.
Action: search[query="Northeastern Logan Airpo