In [5]:
"""
Simple LLM wrapper - start with rules, then add real model
"""
import random
from typing import Dict, List

class SimpleLLM:
    """Mock LLM for initial testing"""
    
    def __init__(self, use_real_model=False):
        self.use_real_model = use_real_model
    
    def get_object_prior(self, object_name: str, locations: List[str]) -> Dict[str, float]:
        """Get probability distribution for where object might be"""
        # Hand-coded rules for common objects
        priors = {
            "apple": {"kitchen": 0.5, "fridge": 0.3, "pantry": 0.2},
            "book": {"bedroom": 0.4, "living_room": 0.4, "office": 0.2},
            "keys": {"entrance": 0.4, "bedroom": 0.3, "kitchen": 0.3},
            "plate": {"kitchen": 0.6, "dishwasher": 0.3, "cabinet": 0.1}
        }
        
        if object_name in priors:
            result = {}
            for loc in locations:
                for key in priors[object_name]:
                    if key in loc.lower() or loc.lower() in key:
                        result[loc] = priors[object_name].get(key, 0)
            
            if result:
                total = sum(result.values())
                return {k: v/total for k, v in result.items()}
        
        # Uniform distribution for unknown objects
        uniform_prob = 1.0 / len(locations)
        return {loc: uniform_prob for loc in locations}
    
    def suggest_action(self, current_state: str, goal: str, history: List[str]) -> str:
        """Suggest next action based on state and goal"""
        # Simple heuristic for now
        actions = ["move_to_kitchen", "move_to_bedroom", "search_area", "pick_object"]
        return random.choice(actions)

In [6]:
"""
Simplified MCTS implementation for CPU
"""
import random
import math
from typing import Dict, List, Any, Optional
from dataclasses import dataclass, field

@dataclass
class Node:
    state: Dict
    action: Optional[str]
    parent: Optional['Node']
    children: List['Node'] = field(default_factory=list)
    visits: int = 0
    value: float = 0.0
    
class SimpleMCTS:
    def __init__(self, llm, c_param=1.4, max_simulations=50):
        self.llm = llm
        self.c_param = c_param
        self.max_simulations = max_simulations
    
    def search(self, initial_state: Dict, goal: Dict, available_actions: List[str]) -> str:
        """Run MCTS and return best action"""
        root = Node(state=initial_state, action=None, parent=None)
        
        for sim in range(self.max_simulations):
            # Selection
            node = self._select(root)
            
            # Expansion
            if node.visits > 0 and not self._is_terminal(node.state, goal):
                node = self._expand(node, available_actions)
            
            # Simulation
            reward = self._simulate(node.state, goal)
            
            # Backpropagation
            self._backpropagate(node, reward)
        
        # Return best action
        return self._best_action(root)
    
    def _select(self, node: Node) -> Node:
        """Select node using UCT"""
        while node.children:
            if all(child.visits > 0 for child in node.children):
                node = self._uct_select(node)
            else:
                # Return first unvisited child
                return next(c for c in node.children if c.visits == 0)
        return node
    
    def _uct_select(self, node: Node) -> Node:
        """Select child with highest UCT value"""
        best_value = -float('inf')
        best_node = None
        
        for child in node.children:
            if child.visits == 0:
                return child
            uct_value = (child.value / child.visits + 
                        self.c_param * math.sqrt(math.log(node.visits) / child.visits))
            if uct_value > best_value:
                best_value = uct_value
                best_node = child
        
        return best_node
    
    def _expand(self, node: Node, actions: List[str]) -> Node:
        """Expand node with one child"""
        # Get LLM suggestion for which action to try
        suggested_action = self.llm.suggest_action(
            str(node.state), 
            "goal", 
            []
        )
        
        # If LLM suggestion is not in available actions, pick random
        if suggested_action not in actions:
            suggested_action = random.choice(actions)
        
        # Create child node
        new_state = self._apply_action(node.state.copy(), suggested_action)
        child = Node(
            state=new_state,
            action=suggested_action,
            parent=node
        )
        node.children.append(child)
        return child
    
    def _simulate(self, state: Dict, goal: Dict) -> float:
        """Random rollout from state"""
        current_state = state.copy()
        steps = 0
        max_steps = 20
        
        while steps < max_steps:
            if self._is_terminal(current_state, goal):
                return 1.0 - (steps / max_steps)  # Reward based on speed
            
            # Random action
            action = random.choice(["move", "pick", "search"])
            current_state = self._apply_action(current_state, action)
            steps += 1
        
        return 0.0
    
    def _backpropagate(self, node: Node, reward: float):
        """Update values up the tree"""
        while node:
            node.visits += 1
            node.value += reward
            node = node.parent
    
    def _best_action(self, root: Node) -> str:
        """Select most visited child"""
        if not root.children:
            return "no_action"
        
        best_child = max(root.children, key=lambda c: c.visits)
        return best_child.action if best_child.action else "no_action"
    
    def _is_terminal(self, state: Dict, goal: Dict) -> bool:
        """Check if goal is reached"""
        # Simple check - customize based on your task
        for key, value in goal.items():
            if state.get(key) != value:
                return False
        return True
    
    def _apply_action(self, state: Dict, action: str) -> Dict:
        """Apply action to state (simplified)"""
        new_state = state.copy()
        # This is task-specific - implement based on your domain
        if "move" in action:
            new_state["last_action"] = action
        return new_state

In [7]:
"""
Simple object finding task for testing
"""
import random
from typing import Dict, List, Tuple

class ObjectFindingTask:
    """Simple task: find objects in house"""
    
    def __init__(self):
        self.rooms = ["kitchen", "bedroom", "living_room", "bathroom", "office"]
        self.objects = ["keys", "phone", "wallet", "book", "apple"]
        self.reset()
    
    def reset(self):
        """Reset environment to random state"""
        self.robot_location = "entrance"
        self.object_locations = {
            obj: random.choice(self.rooms) 
            for obj in self.objects
        }
        self.holding = None
        self.steps_taken = 0
        self.found_objects = set()
    
    def get_state(self) -> Dict:
        """Get current partially observable state"""
        state = {
            "robot_location": self.robot_location,
            "holding": self.holding,
            "visible_objects": self._get_visible_objects(),
            "found_objects": list(self.found_objects)
        }
        return state
    
    def _get_visible_objects(self) -> List[str]:
        """Objects visible in current room"""
        visible = []
        for obj, loc in self.object_locations.items():
            if loc == self.robot_location:
                visible.append(obj)
        return visible
    
    def execute_action(self, action: str) -> Tuple[Dict, float, bool]:
        """Execute action and return new state, reward, done"""
        self.steps_taken += 1
        reward = -0.01  # Small penalty for each step
        
        if action.startswith("move_to_"):
            room = action.replace("move_to_", "")
            if room in self.rooms:
                self.robot_location = room
                reward = -0.01  # Movement cost
        
        elif action.startswith("pick_"):
            obj = action.replace("pick_", "")
            if obj in self._get_visible_objects() and not self.holding:
                self.holding = obj
                self.found_objects.add(obj)
                reward = 0.5  # Reward for picking up object
        
        elif action == "search":
            # Just costs time
            reward = -0.02
        
        # Check if task is done
        done = self.steps_taken >= 50 or len(self.found_objects) >= 3
        
        return self.get_state(), reward, done
    
    def get_available_actions(self) -> List[str]:
        """Get list of valid actions in current state"""
        actions = [f"move_to_{room}" for room in self.rooms]
        actions.append("search")
        
        if self._get_visible_objects() and not self.holding:
            for obj in self._get_visible_objects():
                actions.append(f"pick_{obj}")
        
        return actions

In [8]:
"""
Main experiment runner
"""
import random
import time
from simple_llm import SimpleLLM
from simple_mcts import SimpleMCTS
from task_environment import ObjectFindingTask

def run_baseline_random(task, max_steps=50):
    """Random agent baseline"""
    task.reset()
    total_reward = 0
    steps = 0
    
    for _ in range(max_steps):
        actions = task.get_available_actions()
        action = random.choice(actions)
        state, reward, done = task.execute_action(action)
        total_reward += reward
        steps += 1
        if done:
            break
    
    return total_reward, steps

def run_llm_mcts(task, llm, mcts, max_steps=50):
    """Run LLM-MCTS agent"""
    task.reset()
    total_reward = 0
    steps = 0
    
    for _ in range(max_steps):
        state = task.get_state()
        goal = {"find_objects": True}  # Simplified goal
        
        # Get available actions
        actions = task.get_available_actions()
        
        # Run MCTS to get best action
        action = mcts.search(state, goal, actions)
        
        # Execute action
        state, reward, done = task.execute_action(action)
        total_reward += reward
        steps += 1
        
        if done:
            break
    
    return total_reward, steps

def main():
    print("Starting LLM-MCTS Experiment")
    print("-" * 50)
    
    # Initialize components
    llm = SimpleLLM(use_real_model=False)  # Start with mock
    mcts = SimpleMCTS(llm, max_simulations=20)  # Small for CPU
    task = ObjectFindingTask()
    
    # Run experiments
    n_trials = 5
    
    print("\nRunning Random Baseline...")
    print("-" * 30)
    random_rewards = []
    random_steps = []
    for i in range(n_trials):
        reward, steps = run_baseline_random(task)
        random_rewards.append(reward)
        random_steps.append(steps)
        print(f"Trial {i+1}: Reward={reward:.3f}, Steps={steps}")
    
    avg_random_reward = sum(random_rewards) / len(random_rewards)
    avg_random_steps = sum(random_steps) / len(random_steps)
    print(f"Random Average: Reward={avg_random_reward:.3f}, Steps={avg_random_steps:.1f}")
    
    print("\nRunning LLM-MCTS...")
    print("-" * 30)
    mcts_rewards = []
    mcts_steps = []
    for i in range(n_trials):
        reward, steps = run_llm_mcts(task, llm, mcts)
        mcts_rewards.append(reward)
        mcts_steps.append(steps)
        print(f"Trial {i+1}: Reward={reward:.3f}, Steps={steps}")
    
    avg_mcts_reward = sum(mcts_rewards) / len(mcts_rewards)
    avg_mcts_steps = sum(mcts_steps) / len(mcts_steps)
    print(f"LLM-MCTS Average: Reward={avg_mcts_reward:.3f}, Steps={avg_mcts_steps:.1f}")
    
    print("\n" + "=" * 50)
    print("RESULTS SUMMARY")
    print("=" * 50)
    print(f"Random Agent:   Reward={avg_random_reward:.3f}, Steps={avg_random_steps:.1f}")
    print(f"LLM-MCTS Agent: Reward={avg_mcts_reward:.3f}, Steps={avg_mcts_steps:.1f}")
    print(f"Improvement:    {((avg_mcts_reward - avg_random_reward) / abs(avg_random_reward) * 100):.1f}%")

if __name__ == "__main__":
    random.seed(42)  # For reproducibility
    main()

Starting LLM-MCTS Experiment
--------------------------------------------------

Running Random Baseline...
------------------------------
Trial 1: Reward=-0.060, Steps=50
Trial 2: Reward=-0.120, Steps=50
Trial 3: Reward=-0.080, Steps=50
Trial 4: Reward=-0.060, Steps=50
Trial 5: Reward=-0.080, Steps=50
Random Average: Reward=-0.080, Steps=50.0

Running LLM-MCTS...
------------------------------
Trial 1: Reward=-0.030, Steps=50
Trial 2: Reward=-0.050, Steps=50
Trial 3: Reward=-0.050, Steps=50
Trial 4: Reward=-0.040, Steps=50
Trial 5: Reward=-0.020, Steps=50
LLM-MCTS Average: Reward=-0.038, Steps=50.0

RESULTS SUMMARY
Random Agent:   Reward=-0.080, Steps=50.0
LLM-MCTS Agent: Reward=-0.038, Steps=50.0
Improvement:    52.5%
