In [19]:
#Imported libraries
from google.colab import drive
drive.mount('/content/drive')

import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import sys
import os
import re
from typing import List, Dict, Any
import warnings
warnings.filterwarnings('ignore')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
#Loading the search, prompt, and language model

src_path = '/content/drive/MyDrive/boston-rideshare-agent/src'
if src_path not in sys.path:
    sys.path.insert(0, src_path)

for module in ['search', 'prompting', 'llm']:
    if module in sys.modules:
        del sys.modules[module]

from search import (
    tokenize, compute_tf, compute_df, compute_idf,
    tfidf_vector, cosine, search_corpus, tool_search
)

from prompting import (
    make_prompt, parse_action, format_history, SYSTEM_PREAMBLE
)

from llm import HF_LLM

In [15]:
# Load dataset
df = pd.read_csv('/content/drive/MyDrive/rideshare_kaggle.csv')

# Add temporal features
df['datetime'] = pd.to_datetime(df['timestamp'], unit='ms')
df['hour'] = df['datetime'].dt.hour
df['day_name'] = df['datetime'].dt.day_name()
df['day_of_week'] = df['datetime'].dt.dayofweek

# Filter to complete records
df_search = df.dropna(subset=['price', 'source', 'destination', 'distance']).copy()



# Create trip documents
def trip_to_document(row):
    """Convert trip to searchable text document."""
    return (
        f"source {row['source']} "
        f"destination {row['destination']} "
        f"hour {row['hour']} "
        f"{row['day_name']} "
        f"distance {row['distance']:.1f} miles "
        f"surge {row['surge_multiplier']} "
        f"{row['cab_type']} "
        f"{row['short_summary']} weather "
        f"price {row['price']:.0f}"
    )

# Sample 5000 diverse trips
CORPUS = []
for idx, row in df_search.sample(5000, random_state=42).iterrows():
    CORPUS.append({
        "id": f"trip{idx}",
        "title": f"{row['source']} → {row['destination']}",
        "text": trip_to_document(row),
        "price": row['price'],
        "cab_type": row['cab_type'],
        "surge_multiplier": row['surge_multiplier'],
        "distance": row['distance'],
        "hour": row['hour'],
        "day": row['day_name']
    })

# Build search index
DOC_TOKENS = [tokenize(d["title"] + " " + d["text"]) for d in CORPUS]
VOCAB = sorted(set(t for doc in DOC_TOKENS for t in doc))
IDF = compute_idf(DOC_TOKENS, VOCAB)
DOC_VECS = [tfidf_vector(tokens, IDF) for tokens in DOC_TOKENS]


In [16]:
# Initialize LLM with optimized settings
LLM = HF_LLM(
    model_name="Qwen/Qwen2.5-0.5B-Instruct",
    max_new_tokens=100,
    generation_kwargs={
        "temperature": 0.1,  # Very deterministic
        "top_p": 0.9,
        "do_sample": True
    }
)

TOOLS = {
    "search": lambda query, k=3: tool_search(query, CORPUS, DOC_VECS, IDF, k=k)
}


In [17]:
#Implementing the RideshareAgent
class RideshareAgent:
    """
    Boston Rideshare Decision Agent.
    """

    def __init__(self, llm, tools, config=None):
        """
        Initialize the agent.

        Args:
            llm: Language model instance (HF_LLM)
            tools: Dictionary of available tools
            config: Agent configuration (max_steps, verbose)
        """
        self.llm = llm
        self.tools = tools
        self.config = config or {}
        self.max_steps = self.config.get('max_steps', 6)
        self.verbose = self.config.get('verbose', True)

        # Initialize trajectory
        self.trajectory = []

    def run(self, user_query: str) -> Dict[str, Any]:
        """
        Run the agent on a user query.

        Args:
            user_query: User's rideshare question

        Returns:
            Dictionary with question, final_answer, and steps
        """
        self.trajectory.clear()

        for step_num in range(self.max_steps):
            if self.verbose:
                print(f"\n{'='*60}")
                print(f"STEP {step_num + 1}")
                print(f"{'='*60}")

            # 1. Format prompt based on current trajectory
            prompt = make_prompt(user_query, self.trajectory)

            # 2. Generate Thought + Action from LLM
            out = self.llm(prompt)

            if self.verbose:
                print(f"\nLLM Output:\n{out}")

            # 3. Parse the output
            lines = out.strip().split('\n')

            # Extract thought
            if len(lines) >= 1 and lines[0].startswith('Thought:'):
                thought = lines[0].replace('Thought:', '').strip()
            else:
                thought = "Processing query"

            # Extract and parse action
            action_line = None
            for line in lines:
                if line.startswith('Action:'):
                    action_line = line
                    break

            if not action_line:
                observation = "Invalid action format. Stopping."
                self.trajectory.append({
                    'thought': thought,
                    'action': action_line or "none",
                    'observation': observation
                })
                break

            parsed = parse_action(action_line)

            if not parsed:
                observation = "Invalid action format. Stopping."
                self.trajectory.append({
                    'thought': thought,
                    'action': action_line,
                    'observation': observation
                })
                break

            name, args = parsed

            if name == "finish":
                #Agent is done
                observation = "done"
                self.trajectory.append({
                    'thought': thought,
                    'action': action_line.replace('Action:', '').strip(),
                    'observation': observation
                })

                if self.verbose:
                    print(f"\n✓ Agent finished!")
                    print(f"Final Answer: {args.get('answer', 'N/A')}")

                break

            elif name not in self.tools:
                observation = f"Action '{name}' not allowed or not found."
                self.trajectory.append({
                    'thought': thought,
                    'action': action_line.replace('Action:', '').strip(),
                    'observation': observation
                })

                if self.verbose:
                    print(f"Unknown action: {name}")

                break

            else:
                try:
                    obs_payload = self.tools[name](**args)
                    observation = json.dumps(obs_payload, ensure_ascii=False)

                    if self.verbose:
                        print(f"\n✓ Executed {name} with args: {args}")
                        print(f"Retrieved {len(obs_payload.get('results', []))} results")

                except Exception as e:
                    observation = f"Tool error: {e}"

                    if self.verbose:
                        print(f"Tool execution failed: {e}")

                #Add step to trajectory
                self.trajectory.append({
                    'thought': thought,
                    'action': action_line.replace('Action:', '').strip(),
                    'observation': observation
                })

        #Build final answer from last finish action
        final_answer = None
        for s in reversed(self.trajectory):
            if s['action'].startswith('finish'):
                #Extract answer from finish action
                m = re.search(r'answer="([^"]*)"', s['action'])
                if m:
                    final_answer = m.group(1)
                break

        return {
            "question": user_query,
            "final_answer": final_answer,
            "steps": [{"thought": s["thought"], "action": s["action"], "observation": s["observation"][:500]} for s in self.trajectory]
        }

In [22]:
## Demo Run ##

#Create agent instance
agent = RideshareAgent(
    llm=LLM,
    tools=TOOLS,
    config={
        'max_steps': 6,
        'verbose': True
    }
)

demo_query_1 = "I need to go from Back Bay to Financial District on Monday morning. Should I use Uber or Lyft?"

print(f"User: {demo_query_1}\n")

result_1 = agent.run(demo_query_1)

print("\n" + "=" * 80)
print("AGENT RESULT")
print("=" * 80)
print(f"Question: {result_1['question']}")
print(f"Final Answer: {result_1['final_answer']}")

for i, step in enumerate(result_1['steps'], 1):
    print(f"\nStep {i}:")
    print(f"  Thought: {step['thought']}")
    print(f"  Action: {step['action']}")
    print(f"  Observation: {step['observation'][:200]}...")

User: I need to go from Back Bay to Financial District on Monday morning. Should I use Uber or Lyft?


STEP 1

LLM Output:
Action: search[query="Back Bay Financial District", k=3]

✓ Executed search with args: {'query': 'Back Bay Financial District', 'k': 3}
Retrieved 3 results

STEP 2

LLM Output:
Action: finish[answer="Lyft is cheaper than Uber. Recommend Lyft."]

✓ Agent finished!
Final Answer: Lyft is cheaper than Uber. Recommend Lyft.

AGENT RESULT
Question: I need to go from Back Bay to Financial District on Monday morning. Should I use Uber or Lyft?
Final Answer: Lyft is cheaper than Uber. Recommend Lyft.

Step 1:
  Thought: Processing query
  Action: search[query="Back Bay Financial District", k=3]
  Observation: {"tool": "search", "query": "Back Bay Financial District", "results": [{"id": "trip278061", "title": "Back Bay → Fenway", "snippet": "source Back Bay destination Fenway hour 21 Sunday distance 1.4 mil...

Step 2:
  Thought: Processing query
  Action: finish[answer="Lyf

In [23]:
# Create src/agent.py
agent_code = '''"""
agent.py - Boston Rideshare Decision Agent
Combines search, prompting, and LLM into unified agent system.
"""

import json
import re
from typing import Dict, Any, List


class RideshareAgent:
    """Boston Rideshare Decision Agent using ReAct pattern."""

    def __init__(self, llm, tools, corpus, doc_vecs, idf, config=None):
        """
        Initialize agent.

        Args:
            llm: HF_LLM instance
            tools: Dictionary of available tools
            corpus: Trip document corpus
            doc_vecs: Pre-computed TF-IDF vectors
            idf: IDF dictionary
            config: Agent configuration
        """
        self.llm = llm
        self.tools = tools
        self.corpus = corpus
        self.doc_vecs = doc_vecs
        self.idf = idf
        self.config = config or {}
        self.max_steps = self.config.get('max_steps', 6)
        self.verbose = self.config.get('verbose', True)
        self.trajectory = []

    def run(self, user_query: str) -> Dict[str, Any]:
        """Run agent on user query following ReAct pattern."""
        from prompting import make_prompt, parse_action

        self.trajectory.clear()

        for step_num in range(self.max_steps):
            if self.verbose:
                print(f"\\n{'='*60}\\nSTEP {step_num + 1}\\n{'='*60}")

            # Format prompt
            prompt = make_prompt(user_query, self.trajectory)

            # Generate from LLM
            out = self.llm(prompt)
            if self.verbose:
                print(f"\\nLLM: {out}")

            # Parse output
            lines = out.strip().split('\\n')
            thought = lines[0].replace('Thought:', '').strip() if lines[0].startswith('Thought:') else "Processing"

            action_line = None
            for line in lines:
                if line.startswith('Action:'):
                    action_line = line
                    break

            if not action_line:
                self.trajectory.append({'thought': thought, 'action': 'none', 'observation': 'Invalid format'})
                break

            parsed = parse_action(action_line)
            if not parsed:
                self.trajectory.append({'thought': thought, 'action': action_line, 'observation': 'Parse failed'})
                break

            name, args = parsed

            # Execute action
            if name == "finish":
                self.trajectory.append({'thought': thought, 'action': action_line.replace('Action:', '').strip(), 'observation': 'done'})
                break
            elif name in self.tools:
                try:
                    obs_payload = self.tools[name](**args)
                    observation = json.dumps(obs_payload, ensure_ascii=False)
                except Exception as e:
                    observation = f"Tool error: {e}"
                self.trajectory.append({'thought': thought, 'action': action_line.replace('Action:', '').strip(), 'observation': observation})
            else:
                self.trajectory.append({'thought': thought, 'action': action_line.replace('Action:', '').strip(), 'observation': f"Unknown action: {name}"})
                break

        # Extract final answer
        final_answer = None
        for s in reversed(self.trajectory):
            if s['action'].startswith('finish'):
                m = re.search(r'answer="([^"]*)"', s['action'])
                if m:
                    final_answer = m.group(1)
                break

        return {
            "question": user_query,
            "final_answer": final_answer,
            "steps": self.trajectory
        }
'''

#Save to file
import os
os.makedirs('/content/drive/MyDrive/boston-rideshare-agent/src', exist_ok=True)

file_path = '/content/drive/MyDrive/boston-rideshare-agent/src/agent.py'
with open(file_path, 'w') as f:
    f.write(agent_code)