# Agentic AI

Here we give some code for a simple AI agent.

In [2]:
# Some Imports
import random
import time
from typing import Dict, List, Any, Optional, Tuple
from dataclasses import dataclass
from enum import Enum

In [4]:
class ActionType(Enum):
    MOVE = "move"
    SEARCH = "search"
    COLLECT = "collect"
    EXPLORE = "explore"
    REST = "rest"

@dataclass
class Action:
    type: ActionType
    target: str
    parameters: Dict[str, Any] = None
    confidence: float = 1.0  # How confident the agent is in this action
    
    def __post_init__(self):
        if self.parameters is None:
            self.parameters = {}

@dataclass
class Observation:
    location: str
    items_found: List[str]
    energy_level: float
    message: str
    success: bool = True

class Environment:
    """Grid world environment with random item placement"""
    
    def __init__(self, size: int = 5, num_items: int = 4, seed: Optional[int] = None):
        if seed is not None:
            random.seed(seed)
            
        self.size = size
        self.agent_pos = [0, 0]  # Agent always starts at (0,0)
        self.collected_items = []
        
        # Generate random items
        self.items = self._generate_random_items(num_items)
        
        print(f"🌍 Generated {len(self.items)} items randomly on {size}x{size} grid")
        print(f"📦 Item locations: {dict(self.items)}")
    
    def _generate_random_items(self, num_items: int) -> Dict[Tuple[int, int], str]:
        """Generate random item placements"""
        item_types = ["treasure", "key", "food", "tool", "gem", "coin", "potion", "scroll"]
        
        # Get all possible positions except agent start position
        all_positions = [(x, y) for x in range(self.size) for y in range(self.size)]
        available_positions = [pos for pos in all_positions if pos != (0, 0)]
        
        # Ensure we don't try to place more items than available positions
        num_items = min(num_items, len(available_positions))
        
        # Randomly select positions
        selected_positions = random.sample(available_positions, num_items)
        
        # Randomly assign item types
        selected_items = random.sample(item_types, num_items)
        
        return dict(zip(selected_positions, selected_items))
    
    def execute_action(self, action: Action) -> Observation:
        """Execute an action and return observation"""
        items_here = []  # Initialize items_here for all action types
        success = True
        
        if action.type == ActionType.MOVE:
            direction = action.target
            old_pos = self.agent_pos.copy()
            
            if direction == "north" and self.agent_pos[1] > 0:
                self.agent_pos[1] -= 1
            elif direction == "south" and self.agent_pos[1] < self.size - 1:
                self.agent_pos[1] += 1
            elif direction == "east" and self.agent_pos[0] < self.size - 1:
                self.agent_pos[0] += 1
            elif direction == "west" and self.agent_pos[0] > 0:
                self.agent_pos[0] -= 1
            else:
                success = False
                message = f"Cannot move {direction} from {old_pos} - blocked by boundary"
            
            if success:
                message = f"Moved {direction} from {old_pos} to {self.agent_pos}"
            
        elif action.type == ActionType.SEARCH:
            current_pos = tuple(self.agent_pos)
            if current_pos in self.items:
                items_here.append(self.items[current_pos])
            message = f"Searched location {current_pos}"
            
        elif action.type == ActionType.COLLECT:
            current_pos = tuple(self.agent_pos)
            if current_pos in self.items:
                item = self.items[current_pos]
                self.collected_items.append(item)
                del self.items[current_pos]
                items_here.append(item)
                message = f"Collected {item} at {current_pos}"
            else:
                success = False
                message = f"Nothing to collect at {current_pos}"
                
        elif action.type == ActionType.REST:
            message = f"Agent rested at {self.agent_pos}"
            
        elif action.type == ActionType.EXPLORE:
            # Random exploration move
            directions = ["north", "south", "east", "west"]
            random_direction = random.choice(directions)
            old_pos = self.agent_pos.copy()
            
            if random_direction == "north" and self.agent_pos[1] > 0:
                self.agent_pos[1] -= 1
            elif random_direction == "south" and self.agent_pos[1] < self.size - 1:
                self.agent_pos[1] += 1
            elif random_direction == "east" and self.agent_pos[0] < self.size - 1:
                self.agent_pos[0] += 1
            elif random_direction == "west" and self.agent_pos[0] > 0:
                self.agent_pos[0] -= 1
            else:
                success = False
            
            if success:
                message = f"Explored {random_direction} from {old_pos} to {self.agent_pos}"
            else:
                message = f"Exploration blocked - hit boundary"
                
        else:
            message = f"Executed {action.type.value} action"
        
        return Observation(
            location=f"{self.agent_pos}",
            items_found=items_here,
            energy_level=random.uniform(0.7, 1.0),
            message=message,
            success=success
        )
    
    def get_state(self) -> Dict[str, Any]:
        """Get current environment state"""
        return {
            "agent_position": self.agent_pos,
            "remaining_items": list(self.items.keys()),
            "collected_items": self.collected_items,
            "total_items": len(self.items) + len(self.collected_items)
        }
    
    def visualize(self):
        """Simple text visualization of the environment"""
        print("🗺️  Environment Map:")
        for y in range(self.size):
            row = ""
            for x in range(self.size):
                if [x, y] == self.agent_pos:
                    row += "🤖 "
                elif (x, y) in self.items:
                    # Show different emojis for different items
                    item = self.items[(x, y)]
                    emoji_map = {
                        "treasure": "💰",
                        "key": "🗝️ ",
                        "food": "🍎",
                        "tool": "🔧",
                        "gem": "💎",
                        "coin": "🪙",
                        "potion": "🧪",
                        "scroll": "📜"
                    }
                    row += emoji_map.get(item, "📦") + " "
                else:
                    row += "⬜ "
            print(row)
        print(f"Items remaining: {len(self.items)}, Collected: {len(self.collected_items)}")
        print()

class EnhancedAgent:
    """An AI agent with probabilistic decision-making"""
    
    def __init__(self, goal: str = "collect_all_items", personality: str = "balanced"):
        self.goal = goal
        self.memory = []
        self.knowledge = {}
        self.plan = []
        self.step_count = 0
        self.energy = 1.0
        self.personality = personality  # "cautious", "aggressive", "balanced", "explorer"
        
        # Decision-making parameters based on personality
        self.decision_params = self._set_personality_params()
    
    def _set_personality_params(self) -> Dict[str, float]:
        """Set decision-making parameters based on personality"""
        params = {
            "cautious": {
                "exploration_rate": 0.1,
                "risk_tolerance": 0.2,
                "planning_horizon": 5,
                "energy_threshold": 0.6
            },
            "aggressive": {
                "exploration_rate": 0.05,
                "risk_tolerance": 0.8,
                "planning_horizon": 2,
                "energy_threshold": 0.2
            },
            "balanced": {
                "exploration_rate": 0.15,
                "risk_tolerance": 0.5,
                "planning_horizon": 3,
                "energy_threshold": 0.4
            },
            "explorer": {
                "exploration_rate": 0.4,
                "risk_tolerance": 0.6,
                "planning_horizon": 1,
                "energy_threshold": 0.3
            }
        }
        return params.get(self.personality, params["balanced"])
    
    def perceive(self, observation: Observation) -> None:
        """Process observations with uncertainty"""
        self.memory.append(observation)
        self.energy = max(0, self.energy - 0.05)  # Energy decreases with actions
        
        # Sometimes miss information due to "sensor noise"
        if random.random() > 0.1:  # 90% accuracy in perception
            if observation.items_found:
                loc = observation.location
                if loc not in self.knowledge:
                    self.knowledge[loc] = []
                for item in observation.items_found:
                    if item not in self.knowledge[loc]:
                        self.knowledge[loc].append(item)
        
        print(f"📍 Agent observed: {observation.message} (Energy: {self.energy:.2f})")
        if observation.items_found:
            print(f"🔍 Items found: {observation.items_found}")
    
    def evaluate_options(self, env_state: Dict[str, Any]) -> List[Tuple[Action, float]]:
        """Evaluate multiple action options with scores"""
        remaining_items = env_state["remaining_items"]
        agent_pos = env_state["agent_position"]
        options = []
        
        if not remaining_items:
            return []
        
        # Option 1: Go to nearest item (greedy)
        nearest_item = min(remaining_items, 
                         key=lambda pos: abs(pos[0] - agent_pos[0]) + abs(pos[1] - agent_pos[1]))
        distance = abs(nearest_item[0] - agent_pos[0]) + abs(nearest_item[1] - agent_pos[1])
        greedy_score = 1.0 / (distance + 1)  # Higher score for closer items
        options.append((Action(ActionType.MOVE, f"towards_{nearest_item}"), greedy_score))
        
        # Option 2: Go to most valuable item (if we have value estimates)
        if len(remaining_items) > 1:
            # Simulate item values (in real systems, this might come from experience)
            valued_items = [(item, random.uniform(0.3, 1.0)) for item in remaining_items]
            best_valued = max(valued_items, key=lambda x: x[1])
            value_score = best_valued[1] * 0.8  # Slightly lower than greedy to create tension
            options.append((Action(ActionType.MOVE, f"towards_{best_valued[0]}"), value_score))
        
        # Option 3: Explore unknown areas
        if random.random() < self.decision_params["exploration_rate"]:
            explore_score = 0.3 + random.uniform(0, 0.4)
            options.append((Action(ActionType.EXPLORE, "random_direction"), explore_score))
        
        # Option 4: Rest if energy is low
        if self.energy < self.decision_params["energy_threshold"]:
            rest_score = (self.decision_params["energy_threshold"] - self.energy) * 2
            options.append((Action(ActionType.REST, "recover_energy"), rest_score))
        
        return options
    
    def make_decision(self, options: List[Tuple[Action, float]]) -> Action:
        """Make probabilistic decision based on option scores"""
        if not options:
            return None
        
        # Add randomization to decision making
        actions, scores = zip(*options)
        
        # Method 1: Weighted random selection (exploration vs exploitation)
        if random.random() < self.decision_params["exploration_rate"]:
            # Explore: choose randomly
            chosen_action = random.choice(actions)
            print(f"🎲 Exploring: chose {chosen_action.type.value} randomly")
        else:
            # Exploit: choose based on scores with some randomness
            # Add noise to scores to create uncertainty
            noisy_scores = [score + random.uniform(-0.2, 0.2) for score in scores]
            
            # Softmax-like selection (higher scores more likely, but not guaranteed)
            if max(noisy_scores) > 0:
                # Normalize scores and add temperature for randomness
                temperature = 0.5  # Higher = more random
                exp_scores = [pow(max(0, score), 1/temperature) for score in noisy_scores]
                total = sum(exp_scores)
                
                if total > 0:
                    probabilities = [score/total for score in exp_scores]
                    chosen_action = random.choices(actions, weights=probabilities)[0]
                    print(f"🧠 Decided: {chosen_action.type.value} (prob-based)")
                else:
                    chosen_action = random.choice(actions)
            else:
                chosen_action = random.choice(actions)
        
        return chosen_action
    
    def plan_with_uncertainty(self, env_state: Dict[str, Any]) -> List[Action]:
        """Create plans that account for uncertainty"""
        options = self.evaluate_options(env_state)
        
        if not options:
            return []
        
        chosen_action = self.make_decision(options)
        
        # Sometimes make suboptimal plans due to "bounded rationality"
        if random.random() < 0.15:  # 15% chance of suboptimal planning
            print("🤔 Making suboptimal plan due to cognitive limitations")
            remaining_items = env_state["remaining_items"]
            if remaining_items:
                # Choose a random item instead of optimal
                random_item = random.choice(remaining_items)
                return self._create_path_to_item(random_item, env_state["agent_position"])
        
        # Normal planning
        if chosen_action and chosen_action.type == ActionType.MOVE:
            target_str = chosen_action.target.replace("towards_", "")
            try:
                # Parse the target coordinates
                target = eval(target_str)  # In real code, use proper parsing
                return self._create_path_to_item(target, env_state["agent_position"])
            except:
                # Fallback to nearest item
                remaining_items = env_state["remaining_items"]
                if remaining_items:
                    nearest = min(remaining_items, 
                                key=lambda pos: abs(pos[0] - env_state["agent_position"][0]) + 
                                               abs(pos[1] - env_state["agent_position"][1]))
                    return self._create_path_to_item(nearest, env_state["agent_position"])
        
        elif chosen_action and chosen_action.type == ActionType.EXPLORE:
            # Random exploration
            directions = ["north", "south", "east", "west"]
            random_dir = random.choice(directions)
            return [Action(ActionType.MOVE, random_dir)]
        
        elif chosen_action and chosen_action.type == ActionType.REST:
            return [Action(ActionType.REST, "recover")]
        
        return []
    
    def _create_path_to_item(self, target_item: Tuple[int, int], agent_pos: List[int]) -> List[Action]:
        """Create path with occasional mistakes"""
        plan = []
        target_x, target_y = target_item
        current_x, current_y = agent_pos
        
        # Sometimes choose suboptimal path ordering
        if random.random() < 0.3:  # 30% chance to go vertical first instead
            # Move vertically first
            while current_y != target_y:
                if current_y < target_y:
                    plan.append(Action(ActionType.MOVE, "south"))
                    current_y += 1
                else:
                    plan.append(Action(ActionType.MOVE, "north"))
                    current_y -= 1
            
            # Then horizontally
            while current_x != target_x:
                if current_x < target_x:
                    plan.append(Action(ActionType.MOVE, "east"))
                    current_x += 1
                else:
                    plan.append(Action(ActionType.MOVE, "west"))
                    current_x -= 1
        else:
            # Standard horizontal-first approach
            while current_x != target_x:
                if current_x < target_x:
                    plan.append(Action(ActionType.MOVE, "east"))
                    current_x += 1
                else:
                    plan.append(Action(ActionType.MOVE, "west"))
                    current_x -= 1
            
            while current_y != target_y:
                if current_y < target_y:
                    plan.append(Action(ActionType.MOVE, "south"))
                    current_y += 1
                else:
                    plan.append(Action(ActionType.MOVE, "north"))
                    current_y -= 1
        
        # Add search and collect actions
        plan.append(Action(ActionType.SEARCH, str(target_item)))
        plan.append(Action(ActionType.COLLECT, str(target_item)))
        
        return plan
    
    def act_with_uncertainty(self, environment) -> Optional[Action]:
        """Act with probabilistic decision making"""
        
        # Sometimes forget the current plan (represents distraction/changing mind)
        if self.plan and random.random() < 0.1:  # 10% chance to abandon current plan
            print("🔄 Agent changed mind, abandoning current plan")
            self.plan = []
        
        # If no plan, create new one
        if not self.plan:
            env_state = environment.get_state()
            self.plan = self.plan_with_uncertainty(env_state)
            
            if not self.plan:
                print("🎯 Goal achieved! No more actions needed.")
                return None
        
        # Execute next action with possible errors
        if self.plan:
            action = self.plan.pop(0)
            
            # Sometimes make execution errors
            if random.random() < 0.05:  # 5% chance of execution error
                print("⚠️ Execution error: action failed")
                return Action(ActionType.MOVE, "invalid")  # This will fail gracefully
            
            print(f"🤖 Agent action: {action.type.value} {action.target}")
            return action
        
        return None

# Demo function to show random environments
def demo_random_environments():
    """Demonstrate multiple random environment generations"""
    
    print("🎲 Random Environment Generation Demo")
    print("=" * 50)
    
    # Generate 3 different random environments
    for i in range(3):
        print(f"\n🌍 Random Environment #{i+1}:")
        env = Environment(size=6, num_items=5)
        env.visualize()
        
        # Show some stats
        positions = list(env.items.keys())
        distances = [abs(pos[0]) + abs(pos[1]) for pos in positions]
        avg_distance = sum(distances) / len(distances) if distances else 0
        
        print(f"📊 Average distance from start: {avg_distance:.1f}")
        print(f"📦 Item types: {list(env.items.values())}")
    
    print("\n🔬 Comparing Agent Performance on Random vs Fixed Environments:")
    
    # Test agent on random environment
    print("\n🎯 Agent on Random Environment:")
    random_env = Environment(size=5, num_items=4)
    random_env.visualize()
    
    agent = EnhancedAgent(personality="balanced")
    steps_taken = 0
    max_steps = 30
    
    for step in range(max_steps):
        action = agent.act_with_uncertainty(random_env)
        if action is None:
            break
            
        observation = random_env.execute_action(action)
        agent.perceive(observation)
        steps_taken += 1
        
        # Show progress every few steps
        if step % 5 == 0 or not random_env.items:
            random_env.visualize()
            
        if not random_env.items:
            print(f"🎉 Success! Completed in {steps_taken} steps")
            break
        
        time.sleep(0.2)
    
    final_state = random_env.get_state()
    print(f"\n📈 Final Results:")
    print(f"   Items collected: {len(final_state['collected_items'])}")
    print(f"   Items remaining: {len(final_state['remaining_items'])}")

def create_custom_environment():
    """Create environment with custom parameters"""
    
    print("\n🛠️ Custom Environment Creation:")
    print("You can customize:")
    print("• Grid size (default: 5x5)")
    print("• Number of items (default: 4)")
    print("• Random seed for reproducibility")
    
    # Example custom environments
    print("\n📋 Example Configurations:")
    
    configs = [
        {"size": 3, "num_items": 2, "name": "Small & Simple"},
        {"size": 8, "num_items": 10, "name": "Large & Crowded"},
        {"size": 5, "num_items": 1, "name": "Needle in Haystack"},
        {"size": 4, "num_items": 8, "name": "Item Dense"}
    ]
    
    for config in configs:
        print(f"\n🏷️ {config['name']}:")
        try:
            env = Environment(size=config['size'], num_items=config['num_items'])
            env.visualize()
        except Exception as e:
            print(f"   ⚠️ Configuration failed: {e}")

if __name__ == "__main__":
    demo_random_environments()
    create_custom_environment()

🎲 Random Environment Generation Demo

🌍 Random Environment #1:
🌍 Generated 5 items randomly on 6x6 grid
📦 Item locations: {(1, 5): 'treasure', (1, 4): 'coin', (4, 4): 'key', (1, 1): 'tool', (3, 3): 'potion'}
🗺️  Environment Map:
🤖 ⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ 🔧 ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ 🧪 ⬜ ⬜ 
⬜ 🪙 ⬜ ⬜ 🗝️  ⬜ 
⬜ 💰 ⬜ ⬜ ⬜ ⬜ 
Items remaining: 5, Collected: 0

📊 Average distance from start: 5.4
📦 Item types: ['treasure', 'coin', 'key', 'tool', 'potion']

🌍 Random Environment #2:
🌍 Generated 5 items randomly on 6x6 grid
📦 Item locations: {(2, 3): 'food', (3, 0): 'potion', (1, 5): 'tool', (2, 2): 'treasure', (3, 1): 'gem'}
🗺️  Environment Map:
🤖 ⬜ ⬜ 🧪 ⬜ ⬜ 
⬜ ⬜ ⬜ 💎 ⬜ ⬜ 
⬜ ⬜ 💰 ⬜ ⬜ ⬜ 
⬜ ⬜ 🍎 ⬜ ⬜ ⬜ 
⬜ ⬜ ⬜ ⬜ ⬜ ⬜ 
⬜ 🔧 ⬜ ⬜ ⬜ ⬜ 
Items remaining: 5, Collected: 0

📊 Average distance from start: 4.4
📦 Item types: ['food', 'potion', 'tool', 'treasure', 'gem']

🌍 Random Environment #3:
🌍 Generated 5 items randomly on 6x6 grid
📦 Item locations: {(1, 3): 'scroll', (3, 2): 'tool', (0, 2): 'potion', (1, 0): 'coin', (4, 3): '