# ⚽ Multi-Agent Soccer with Improved Physics (20-second episodes)
## ボールが挟まらない改良版物理エンジン

ボールがプレイヤー間に挟まって動かなくなる問題を解決した改良版です。

In [None]:
!pip install numpy matplotlib pygame pettingzoo torch pillow opencv-python-headless imageio tqdm -q
print('✅ Dependencies installed')

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.animation import FuncAnimation
import pygame
import torch
import torch.nn as nn
import torch.optim as optim
from collections import defaultdict, deque
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple, Any, Union
from enum import Enum
import random
import math
from abc import ABC, abstractmethod
import warnings
from PIL import Image
import cv2
import imageio
from tqdm import tqdm
import os
from IPython.display import Video, display, HTML
warnings.filterwarnings('ignore')

print('✅ Imports complete')

### ⚙️ Improved Physics Configuration

In [None]:
@dataclass
class ImprovedSoccerConfig:
    """Configuration with improved physics to prevent ball getting stuck"""
    # Field dimensions
    FIELD_SIZE: Tuple[int, int] = (800, 600)
    GOAL_SIZE: Tuple[int, int] = (50, 150)
    
    # Game settings for 20 seconds
    MAX_STEPS: int = 600  # 20 seconds at 30 FPS
    TEAM_SIZE: int = 2
    
    # Player physics
    PLAYER_RADIUS: float = 20.0
    PLAYER_SPEED: float = 4.5  # Slightly faster for more dynamic play
    PLAYER_MASS: float = 1.0
    
    # Ball physics - IMPROVED SETTINGS
    BALL_RADIUS: float = 10.0
    BALL_MASS: float = 0.4  # Lighter ball for easier movement
    BALL_FRICTION: float = 0.96  # Higher value = less friction (0.96 vs 0.93)
    BALL_RESTITUTION: float = 0.85  # Higher bounce (0.85 vs 0.7)
    BALL_SPEED_MULTIPLIER: float = 1.8  # Faster ball movement (1.8 vs 1.3)
    MIN_BALL_SPEED: float = 0.5  # Minimum speed threshold
    
    # Anti-stuck mechanics
    STUCK_DETECTION_FRAMES: int = 15  # Frames to detect if ball is stuck
    STUCK_VELOCITY_THRESHOLD: float = 0.8  # Velocity below this = stuck
    ESCAPE_FORCE: float = 8.0  # Force applied to escape when stuck
    PLAYER_SEPARATION_FORCE: float = 3.0  # Force to separate overlapping players
    
    # Collision physics
    COLLISION_ELASTICITY: float = 0.9  # Elastic collisions (0.9 vs 0.6)
    COLLISION_DAMPING: float = 0.85  # Less damping for more movement
    
    # Kick mechanics
    KICK_FORCE: float = 15.0  # Stronger kicks (15 vs 10)
    KICK_RANGE: float = 30.0  # Range for kicking
    
    # Colors
    TEAM_COLORS: List[str] = field(default_factory=lambda: ['blue', 'red'])
    BALL_COLOR: str = 'white'
    FIELD_COLOR: str = 'green'

config = ImprovedSoccerConfig()
print("✅ Improved physics configuration created:")
print(f"   Ball friction: {config.BALL_FRICTION} (less friction)")
print(f"   Ball restitution: {config.BALL_RESTITUTION} (more bounce)")
print(f"   Ball speed multiplier: {config.BALL_SPEED_MULTIPLIER}")
print(f"   Anti-stuck escape force: {config.ESCAPE_FORCE}")
print(f"   Collision elasticity: {config.COLLISION_ELASTICITY}")

### 🔧 Enhanced Physics Engine with Anti-Stuck Mechanics

In [None]:
class ImprovedPhysicsEngine:
    """Physics engine with anti-stuck mechanics"""
    
    def __init__(self, config: ImprovedSoccerConfig):
        self.config = config
        self.stuck_frames = 0
        self.last_ball_position = None
        self.ball_velocity_history = deque(maxlen=config.STUCK_DETECTION_FRAMES)
        
    def update_ball(self, ball: 'Ball', players: List['Player'], delta_time: float = 1.0):
        """Update ball physics with anti-stuck mechanics"""
        
        # Store velocity history
        ball_speed = np.linalg.norm(ball.velocity)
        self.ball_velocity_history.append(ball_speed)
        
        # Check if ball is stuck
        if self._is_ball_stuck(ball, players):
            self._apply_escape_force(ball, players)
            self.stuck_frames += 1
        else:
            self.stuck_frames = 0
        
        # Apply improved friction
        ball.velocity *= self.config.BALL_FRICTION
        
        # Add small random perturbation to prevent perfect symmetry
        if ball_speed < self.config.MIN_BALL_SPEED:
            ball.velocity += np.random.randn(2) * 0.1
        
        # Update position
        ball.position += ball.velocity * delta_time
        
        # Handle collisions with walls (improved bounce)
        self._handle_wall_collision(ball)
        
        # Handle collisions with players (improved response)
        for player in players:
            self._handle_ball_player_collision(ball, player)
        
        # Store position for stuck detection
        self.last_ball_position = ball.position.copy()
    
    def _is_ball_stuck(self, ball: 'Ball', players: List['Player']) -> bool:
        """Detect if ball is stuck between players"""
        if len(self.ball_velocity_history) < self.config.STUCK_DETECTION_FRAMES:
            return False
        
        # Check average velocity
        avg_velocity = np.mean(list(self.ball_velocity_history))
        if avg_velocity > self.config.STUCK_VELOCITY_THRESHOLD:
            return False
        
        # Check if ball is near multiple players
        nearby_players = 0
        for player in players:
            dist = np.linalg.norm(ball.position - player.position)
            if dist < self.config.PLAYER_RADIUS + self.config.BALL_RADIUS + 5:
                nearby_players += 1
        
        return nearby_players >= 2
    
    def _apply_escape_force(self, ball: 'Ball', players: List['Player']):
        """Apply force to escape from stuck situation"""
        # Find the two closest players
        distances = [(p, np.linalg.norm(ball.position - p.position)) for p in players]
        distances.sort(key=lambda x: x[1])
        
        if len(distances) >= 2:
            player1, player2 = distances[0][0], distances[1][0]
            
            # Calculate escape direction (perpendicular to line between players)
            player_line = player2.position - player1.position
            if np.linalg.norm(player_line) > 0:
                player_line = player_line / np.linalg.norm(player_line)
                # Perpendicular direction with random choice
                escape_dir = np.array([-player_line[1], player_line[0]])
                if random.random() > 0.5:
                    escape_dir = -escape_dir
            else:
                # Random escape direction
                angle = random.uniform(0, 2 * math.pi)
                escape_dir = np.array([math.cos(angle), math.sin(angle)])
            
            # Apply escape force
            ball.velocity += escape_dir * self.config.ESCAPE_FORCE
            
            # Also slightly push players apart
            separation = player2.position - player1.position
            if np.linalg.norm(separation) > 0:
                separation = separation / np.linalg.norm(separation)
                player1.position -= separation * self.config.PLAYER_SEPARATION_FORCE
                player2.position += separation * self.config.PLAYER_SEPARATION_FORCE
    
    def _handle_wall_collision(self, ball: 'Ball'):
        """Handle ball collision with walls (improved bounce)"""
        width, height = self.config.FIELD_SIZE
        
        # Left/Right walls
        if ball.position[0] - self.config.BALL_RADIUS <= 0:
            ball.position[0] = self.config.BALL_RADIUS
            ball.velocity[0] = abs(ball.velocity[0]) * self.config.BALL_RESTITUTION
            ball.velocity += np.random.randn(2) * 0.5  # Add randomness
        elif ball.position[0] + self.config.BALL_RADIUS >= width:
            ball.position[0] = width - self.config.BALL_RADIUS
            ball.velocity[0] = -abs(ball.velocity[0]) * self.config.BALL_RESTITUTION
            ball.velocity += np.random.randn(2) * 0.5
        
        # Top/Bottom walls
        if ball.position[1] - self.config.BALL_RADIUS <= 0:
            ball.position[1] = self.config.BALL_RADIUS
            ball.velocity[1] = abs(ball.velocity[1]) * self.config.BALL_RESTITUTION
            ball.velocity += np.random.randn(2) * 0.5
        elif ball.position[1] + self.config.BALL_RADIUS >= height:
            ball.position[1] = height - self.config.BALL_RADIUS
            ball.velocity[1] = -abs(ball.velocity[1]) * self.config.BALL_RESTITUTION
            ball.velocity += np.random.randn(2) * 0.5
    
    def _handle_ball_player_collision(self, ball: 'Ball', player: 'Player'):
        """Handle collision between ball and player (improved response)"""
        dist_vec = ball.position - player.position
        dist = np.linalg.norm(dist_vec)
        
        min_dist = self.config.BALL_RADIUS + self.config.PLAYER_RADIUS
        
        if dist < min_dist and dist > 0:
            # Normalize collision vector
            collision_normal = dist_vec / dist
            
            # Separate ball and player
            overlap = min_dist - dist
            ball.position += collision_normal * (overlap * 0.7)  # Ball moves more
            player.position -= collision_normal * (overlap * 0.3)  # Player moves less
            
            # Calculate relative velocity
            relative_velocity = ball.velocity - player.velocity
            velocity_along_normal = np.dot(relative_velocity, collision_normal)
            
            # Only resolve if velocities are converging
            if velocity_along_normal < 0:
                # Calculate impulse with improved elasticity
                impulse = 2 * velocity_along_normal / (1/self.config.BALL_MASS + 1/self.config.PLAYER_MASS)
                impulse_vector = impulse * collision_normal * self.config.COLLISION_ELASTICITY
                
                # Apply impulse
                ball.velocity -= impulse_vector / self.config.BALL_MASS
                player.velocity += impulse_vector / self.config.PLAYER_MASS
                
                # Add kick effect if player is trying to kick
                if player.is_kicking:
                    kick_force = collision_normal * self.config.KICK_FORCE
                    ball.velocity += kick_force
                    
                    # Add spin/curve effect
                    perpendicular = np.array([-collision_normal[1], collision_normal[0]])
                    ball.velocity += perpendicular * random.uniform(-2, 2)
                
                # Apply damping
                ball.velocity *= self.config.COLLISION_DAMPING
                
                # Ensure minimum speed
                if np.linalg.norm(ball.velocity) < self.config.MIN_BALL_SPEED:
                    ball.velocity = collision_normal * self.config.MIN_BALL_SPEED * 2

print('✅ Improved physics engine created with anti-stuck mechanics')

### 🎮 Game Entities

In [None]:
class Ball:
    """Ball entity with improved physics properties"""
    
    def __init__(self, position: np.ndarray):
        self.position = position.astype(float)
        self.velocity = np.zeros(2, dtype=float)
        self.stuck_counter = 0
    
    def reset(self, position: np.ndarray):
        self.position = position.astype(float)
        self.velocity = np.zeros(2, dtype=float)
        self.stuck_counter = 0


class Player:
    """Player entity with kicking ability"""
    
    def __init__(self, player_id: int, team: int, position: np.ndarray):
        self.id = player_id
        self.team = team
        self.position = position.astype(float)
        self.velocity = np.zeros(2, dtype=float)
        self.is_kicking = False
        self.stamina = 1.0
    
    def reset(self, position: np.ndarray):
        self.position = position.astype(float)
        self.velocity = np.zeros(2, dtype=float)
        self.is_kicking = False
        self.stamina = 1.0

print('✅ Game entities defined')

### 🏟️ Soccer Environment with Improved Physics

In [None]:
# AgentSelector implementation (fallback if import fails)try:    from pettingzoo.utils.agent_selector import AgentSelectorexcept ImportError:    # Fallback implementation    class AgentSelector:        def __init__(self, agents):            self.agents = agents            self._current_agent_idx = 0            self.selected_agent = self.agents[0] if agents else None                def next(self):            if not self.agents:                return None            self.selected_agent = self.agents[self._current_agent_idx]            self._current_agent_idx = (self._current_agent_idx + 1) % len(self.agents)            return self.selected_agent                def is_last(self):            return self._current_agent_idx == 0                def reset(self):            self._current_agent_idx = 0            self.selected_agent = self.agents[0] if self.agents else Nonefrom gymnasium import spacesfrom pettingzoo import AECEnvfrom pettingzoo.utils.agent_selector import AgentSelectorclass ImprovedSoccerEnvironment(AECEnv):    """Soccer environment with improved physics"""        metadata = {'render_modes': ['human', 'rgb_array'], 'render_fps': 30}        def __init__(self, config: ImprovedSoccerConfig = None, render_mode: str = None):        super().__init__()        self.config = config or ImprovedSoccerConfig()        self.render_mode = render_mode                # Initialize physics engine        self.physics = ImprovedPhysicsEngine(self.config)                # Initialize agents        self.possible_agents = [f'player_{i}' for i in range(self.config.TEAM_SIZE * 2)]        self.agents = self.possible_agents[:]        self.agent_name_mapping = {agent: i for i, agent in enumerate(self.agents)}                # Initialize game objects        self.ball = None        self.players = {}        self.reset()                # Define action and observation spaces        self._action_spaces = {            agent: spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32)            for agent in self.possible_agents        }        self._observation_spaces = {            agent: spaces.Box(low=0, high=1, shape=(28,), dtype=np.float32)            for agent in self.possible_agents        }        def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):        """Reset environment with improved initial positions"""        if seed is not None:            np.random.seed(seed)            random.seed(seed)                self.agents = self.possible_agents[:]        self.rewards = {agent: 0 for agent in self.agents}        self._cumulative_rewards = {agent: 0 for agent in self.agents}        self.terminations = {agent: False for agent in self.agents}        self.truncations = {agent: False for agent in self.agents}        self.infos = {agent: {} for agent in self.agents}                # Reset ball at center with small random offset        center = np.array(self.config.FIELD_SIZE) / 2        offset = np.random.randn(2) * 10        self.ball = Ball(center + offset)                # Reset players with better spacing        self.players = {}        width, height = self.config.FIELD_SIZE                for i, agent in enumerate(self.agents):            team = i // self.config.TEAM_SIZE            team_position = i % self.config.TEAM_SIZE                        # Improved initial positions to avoid clustering            if team == 0:  # Blue team (left)                x = width * 0.25 + random.uniform(-20, 20)                y = height * (0.35 + team_position * 0.3) + random.uniform(-20, 20)            else:  # Red team (right)                x = width * 0.75 + random.uniform(-20, 20)                y = height * (0.35 + team_position * 0.3) + random.uniform(-20, 20)                        self.players[agent] = Player(i, team, np.array([x, y]))                # Reset other state        self.current_step = 0        self.score = [0, 0]                # Set up agent selector        self._agent_selector = AgentSelector(self.agents)        self.agent_selection = self._agent_selector.next()                return self.observe(self.agent_selection), self.infos[self.agent_selection]        def step(self, action: np.ndarray):        """Execute one step with improved physics"""        if self.terminations[self.agent_selection] or self.truncations[self.agent_selection]:            self._was_dead_step(action)            return                # Apply action to current player        player = self.players[self.agent_selection]        self._apply_action(player, action)                # Update only after all agents have acted        if self._agent_selector.is_last():            self._update_game_state()                # Move to next agent        self.agent_selection = self._agent_selector.next()        def _apply_action(self, player: Player, action: np.ndarray):        """Apply action to player with improved control"""        # Movement (first 2 dimensions)        movement = action[:2] * self.config.PLAYER_SPEED        player.velocity = movement                # Kick action (dimension 2)        player.is_kicking = action[2] > 0.5                # Update position        player.position += player.velocity                # Keep player in bounds        player.position[0] = np.clip(player.position[0],                                     self.config.PLAYER_RADIUS,                                    self.config.FIELD_SIZE[0] - self.config.PLAYER_RADIUS)        player.position[1] = np.clip(player.position[1],                                    self.config.PLAYER_RADIUS,                                    self.config.FIELD_SIZE[1] - self.config.PLAYER_RADIUS)        def _update_game_state(self):        """Update game state with improved physics"""        # Update ball physics with anti-stuck mechanics        self.physics.update_ball(self.ball, list(self.players.values()))                # Check for goals        self._check_goals()                # Calculate rewards        self._calculate_rewards()                # Update step counter        self.current_step += 1                # Check for episode end        if self.current_step >= self.config.MAX_STEPS:            for agent in self.agents:                self.truncations[agent] = True        def _check_goals(self):        """Check if ball is in goal"""        ball_x, ball_y = self.ball.position        width, height = self.config.FIELD_SIZE        goal_top = (height - self.config.GOAL_SIZE[1]) / 2        goal_bottom = goal_top + self.config.GOAL_SIZE[1]                # Check left goal (team 1 scores)        if ball_x <= self.config.GOAL_SIZE[0] and goal_top <= ball_y <= goal_bottom:            self.score[1] += 1            self._reset_after_goal()            print(f"⚽ GOAL! Red team scores! Score: {self.score}")                # Check right goal (team 0 scores)        elif ball_x >= width - self.config.GOAL_SIZE[0] and goal_top <= ball_y <= goal_bottom:            self.score[0] += 1            self._reset_after_goal()            print(f"⚽ GOAL! Blue team scores! Score: {self.score}")        def _reset_after_goal(self):        """Reset positions after goal with anti-clustering"""        # Reset ball to center with random velocity        center = np.array(self.config.FIELD_SIZE) / 2        self.ball.position = center.copy()        # Give ball initial random velocity to prevent immediate clustering        angle = random.uniform(0, 2 * math.pi)        speed = random.uniform(2, 4)        self.ball.velocity = np.array([math.cos(angle), math.sin(angle)]) * speed                # Reset players to starting positions        width, height = self.config.FIELD_SIZE        for i, agent in enumerate(self.agents):            team = i // self.config.TEAM_SIZE            team_position = i % self.config.TEAM_SIZE                        if team == 0:                x = width * 0.25                y = height * (0.35 + team_position * 0.3)            else:                x = width * 0.75                y = height * (0.35 + team_position * 0.3)                        self.players[agent].position = np.array([x, y])            self.players[agent].velocity = np.zeros(2)        def _calculate_rewards(self):        """Calculate rewards for all agents"""        for agent in self.agents:            player = self.players[agent]            reward = 0.0                        # Distance to ball reward            dist_to_ball = np.linalg.norm(self.ball.position - player.position)            reward += max(0, 1.0 - dist_to_ball / 200) * 0.1                        # Ball movement reward (encourage active play)            ball_speed = np.linalg.norm(self.ball.velocity)            if dist_to_ball < 50:                reward += ball_speed * 0.05                        # Goal rewards            if self.score[player.team] > 0:                reward += 10.0            if self.score[1 - player.team] > 0:                reward -= 10.0                        self.rewards[agent] = reward            self._cumulative_rewards[agent] += reward        def observe(self, agent: str) -> np.ndarray:        """Get observation for agent"""        player = self.players[agent]        obs = []                # Normalize positions        width, height = self.config.FIELD_SIZE                # Self position and velocity        obs.extend(player.position / [width, height])        obs.extend(player.velocity / 10.0)                # Ball position and velocity        obs.extend(self.ball.position / [width, height])        obs.extend(self.ball.velocity / 20.0)                # Teammate and opponents        for other_agent in self.agents:            if other_agent != agent:                other = self.players[other_agent]                obs.extend(other.position / [width, height])                obs.extend(other.velocity / 10.0)                # Game state        obs.extend([self.score[0] / 10, self.score[1] / 10])        obs.append(self.current_step / self.config.MAX_STEPS)        obs.append(player.stamina)                return np.array(obs, dtype=np.float32)        def render(self):        """Render the environment"""        if self.render_mode == 'rgb_array':            return self._render_frame()        elif self.render_mode == 'human':            frame = self._render_frame()            plt.imshow(frame)            plt.axis('off')            plt.show()        def _render_frame(self) -> np.ndarray:        """Render current frame"""        width, height = self.config.FIELD_SIZE                # Create figure        fig, ax = plt.subplots(figsize=(10, 7))                # Draw field        field = patches.Rectangle((0, 0), width, height,                                  linewidth=2, edgecolor='white',                                 facecolor='green')        ax.add_patch(field)                # Draw goals        goal_height = self.config.GOAL_SIZE[1]        goal_y = (height - goal_height) / 2                left_goal = patches.Rectangle((0, goal_y), self.config.GOAL_SIZE[0],                                     goal_height, facecolor='white', alpha=0.3)        right_goal = patches.Rectangle((width - self.config.GOAL_SIZE[0], goal_y),                                      self.config.GOAL_SIZE[0], goal_height,                                      facecolor='white', alpha=0.3)        ax.add_patch(left_goal)        ax.add_patch(right_goal)                # Draw players        for agent, player in self.players.items():            color = self.config.TEAM_COLORS[player.team]            circle = patches.Circle(player.position, self.config.PLAYER_RADIUS,                                   facecolor=color, edgecolor='white', linewidth=2)            ax.add_patch(circle)                        # Add player number            ax.text(player.position[0], player.position[1], str(player.id),                   ha='center', va='center', color='white', fontsize=10, fontweight='bold')                # Draw ball        ball_circle = patches.Circle(self.ball.position, self.config.BALL_RADIUS,                                    facecolor='white', edgecolor='black', linewidth=2)        ax.add_patch(ball_circle)                # Draw ball velocity indicator        if np.linalg.norm(self.ball.velocity) > 0.1:            vel_scale = 5            ax.arrow(self.ball.position[0], self.ball.position[1],                    self.ball.velocity[0] * vel_scale,                    self.ball.velocity[1] * vel_scale,                    color='yellow', width=2, alpha=0.7)                # Add score and time        time_remaining = (self.config.MAX_STEPS - self.current_step) / 30  # seconds        ax.text(width/2, 20, f'Blue {self.score[0]} - {self.score[1]} Red',               ha='center', fontsize=16, fontweight='bold', color='white')        ax.text(width/2, height-20, f'Time: {time_remaining:.1f}s',               ha='center', fontsize=12, color='white')                # Add stuck indicator if ball is stuck        if self.physics.stuck_frames > 0:            ax.text(self.ball.position[0], self.ball.position[1] - 30,                   '⚠️ STUCK', ha='center', color='red', fontsize=10, fontweight='bold')                ax.set_xlim(0, width)        ax.set_ylim(0, height)        ax.set_aspect('equal')        ax.axis('off')                # Convert to array        fig.canvas.draw()        frame = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)        frame = frame.reshape(fig.canvas.get_width_height()[::-1] + (3,))        plt.close(fig)                return frameprint('✅ Improved Soccer Environment created')

### 🤖 Smart Expert Agent

In [None]:
class SmartExpertAgent:
    """Expert agent designed to avoid ball getting stuck"""
    
    def __init__(self, agent_id: int, team: int, config: ImprovedSoccerConfig):
        self.agent_id = agent_id
        self.team = team
        self.config = config
        self.role = 'attacker' if agent_id % 2 == 0 else 'defender'
        self.last_ball_pos = None
        self.stuck_counter = 0
    
    def select_action(self, observation: np.ndarray) -> np.ndarray:
        """Select action with anti-stuck strategy"""
        # Parse observation
        width, height = self.config.FIELD_SIZE
        self_pos = observation[0:2] * [width, height]
        self_vel = observation[2:4] * 10
        ball_pos = observation[4:6] * [width, height]
        ball_vel = observation[6:8] * 20
        
        # Detect if ball might be stuck
        if self.last_ball_pos is not None:
            ball_movement = np.linalg.norm(ball_pos - self.last_ball_pos)
            if ball_movement < 2.0:  # Ball barely moved
                self.stuck_counter += 1
            else:
                self.stuck_counter = 0
        self.last_ball_pos = ball_pos.copy()
        
        # Calculate action
        action = np.zeros(5)
        
        # If ball might be stuck, use special strategy
        if self.stuck_counter > 5:
            return self._unstuck_strategy(self_pos, ball_pos)
        
        # Normal strategy
        dist_to_ball = np.linalg.norm(ball_pos - self_pos)
        
        if self.role == 'attacker':
            # Attacker: Go for ball and goal
            if dist_to_ball < 100:
                # Close to ball - kick towards goal
                if self.team == 0:
                    target = np.array([width - 50, height/2])  # Right goal
                else:
                    target = np.array([50, height/2])  # Left goal
                
                direction_to_goal = target - ball_pos
                direction_to_goal = direction_to_goal / (np.linalg.norm(direction_to_goal) + 1e-6)
                
                # Position behind ball
                ideal_pos = ball_pos - direction_to_goal * 30
                move_dir = ideal_pos - self_pos
                
                if dist_to_ball < 40:
                    action[2] = 1.0  # Kick
                    # Move in kick direction for follow-through
                    action[0:2] = direction_to_goal
                else:
                    action[0:2] = move_dir / (np.linalg.norm(move_dir) + 1e-6)
            else:
                # Move to ball
                direction = ball_pos - self_pos
                action[0:2] = direction / (np.linalg.norm(direction) + 1e-6)
        
        else:  # Defender
            # Defender: Guard goal
            if self.team == 0:
                goal_pos = np.array([50, height/2])
            else:
                goal_pos = np.array([width - 50, height/2])
            
            # Position between ball and goal
            ideal_pos = goal_pos + (ball_pos - goal_pos) * 0.4
            
            if dist_to_ball < 50:
                # Clear ball away from goal
                clear_dir = ball_pos - goal_pos
                clear_dir = clear_dir / (np.linalg.norm(clear_dir) + 1e-6)
                action[0:2] = clear_dir
                action[2] = 1.0  # Kick
            else:
                # Move to ideal position
                move_dir = ideal_pos - self_pos
                action[0:2] = move_dir / (np.linalg.norm(move_dir) + 1e-6)
        
        # Add noise to prevent perfect symmetry
        action[0:2] += np.random.randn(2) * 0.1
        
        # Clip actions
        action = np.clip(action, -1, 1)
        
        return action
    
    def _unstuck_strategy(self, self_pos: np.ndarray, ball_pos: np.ndarray) -> np.ndarray:
        """Special strategy when ball is stuck"""
        action = np.zeros(5)
        
        # Move away from ball briefly to give space
        direction_away = self_pos - ball_pos
        if np.linalg.norm(direction_away) < 50:
            # Too close - back off
            action[0:2] = direction_away / (np.linalg.norm(direction_away) + 1e-6)
        else:
            # Approach from different angle
            angle_offset = np.pi/4 if random.random() > 0.5 else -np.pi/4
            direction = ball_pos - self_pos
            angle = np.arctan2(direction[1], direction[0]) + angle_offset
            action[0:2] = [np.cos(angle), np.sin(angle)]
            
            if np.linalg.norm(ball_pos - self_pos) < 40:
                action[2] = 1.0  # Strong kick to free ball
        
        # Reset counter occasionally
        if random.random() < 0.1:
            self.stuck_counter = 0
        
        return np.clip(action, -1, 1)

print('✅ Smart Expert Agent created')

### 🏃 Training with Improved Physics

In [None]:
def train_with_improved_physics(num_episodes: int = 10):
    """Train agents with improved physics"""
    
    config = ImprovedSoccerConfig()
    env = ImprovedSoccerEnvironment(config, render_mode='rgb_array')
    
    # Create expert agents
    agents = {}
    for i, agent_name in enumerate(env.possible_agents):
        team = i // config.TEAM_SIZE
        agents[agent_name] = SmartExpertAgent(i, team, config)
    
    # Statistics
    episode_stats = []
    
    for episode in range(num_episodes):
        obs, info = env.reset()
        
        episode_frames = []
        goals_timeline = []
        ball_stuck_time = 0
        
        print(f"\n📺 Episode {episode + 1}/{num_episodes}")
        
        while env.agents:
            # Get action from current agent
            current_agent = env.agent_selection
            agent = agents[current_agent]
            action = agent.select_action(obs)
            
            # Step environment
            env.step(action)
            
            # Get next observation
            if env.agents:
                obs = env.observe(env.agent_selection)
            
            # Record frame every 10 steps
            if env.current_step % 10 == 0:
                frame = env.render()
                episode_frames.append(frame)
            
            # Track stuck frames
            if env.physics.stuck_frames > 0:
                ball_stuck_time += 1
            
            # Record goals
            current_score = sum(env.score)
            if len(goals_timeline) == 0 or current_score > goals_timeline[-1][1]:
                goals_timeline.append((env.current_step, current_score))
        
        # Episode statistics
        final_score = env.score
        total_goals = sum(final_score)
        stuck_percentage = (ball_stuck_time / config.MAX_STEPS) * 100
        
        episode_stats.append({
            'episode': episode + 1,
            'blue_goals': final_score[0],
            'red_goals': final_score[1],
            'total_goals': total_goals,
            'stuck_percentage': stuck_percentage,
            'goals_timeline': goals_timeline
        })
        
        print(f"   Final Score: Blue {final_score[0]} - {final_score[1]} Red")
        print(f"   Total Goals: {total_goals}")
        print(f"   Ball Stuck Time: {stuck_percentage:.1f}%")
        
        # Save video for first and last episodes
        if episode == 0 or episode == num_episodes - 1:
            video_name = f'match_episode_{episode + 1}.mp4'
            save_video(episode_frames, video_name)
            print(f"   Video saved: {video_name}")
    
    env.close()
    
    # Print summary
    print("\n" + "="*50)
    print("📊 TRAINING SUMMARY")
    print("="*50)
    
    avg_goals = np.mean([s['total_goals'] for s in episode_stats])
    avg_stuck = np.mean([s['stuck_percentage'] for s in episode_stats])
    
    print(f"Average goals per episode: {avg_goals:.2f}")
    print(f"Average stuck time: {avg_stuck:.1f}%")
    
    # Show improvement
    first_half_goals = np.mean([s['total_goals'] for s in episode_stats[:num_episodes//2]])
    second_half_goals = np.mean([s['total_goals'] for s in episode_stats[num_episodes//2:]])
    
    print(f"\nFirst half average goals: {first_half_goals:.2f}")
    print(f"Second half average goals: {second_half_goals:.2f}")
    
    if avg_stuck < 5:
        print("\n✅ SUCCESS: Ball stuck problem is resolved!")
    else:
        print(f"\n⚠️ Ball still gets stuck {avg_stuck:.1f}% of the time")
    
    return episode_stats

print('✅ Training function ready')

### 🎥 Video Generation

In [None]:
def save_video(frames: List[np.ndarray], filename: str, fps: int = 15):
    """Save frames as video"""
    if len(frames) == 0:
        print("No frames to save")
        return
    
    writer = imageio.get_writer(filename, fps=fps)
    for frame in frames:
        writer.append_data(frame)
    writer.close()
    print(f"✅ Video saved: {filename}")

print('✅ Video generation function ready')

### 🚀 Run Training with Improved Physics

In [None]:
# Run training with improved physics
print("🏃 Starting training with improved physics...")
print("This will prevent the ball from getting stuck between players.\n")

stats = train_with_improved_physics(num_episodes=5)

print("\n✅ Training complete!")
print("The improved physics should prevent the ball from getting stuck.")

In [None]:
# Display the video from the last episode
from IPython.display import Video

video_file = 'match_episode_5.mp4'
if os.path.exists(video_file):
    print("🎬 Displaying match video with improved physics:")
    display(Video(video_file, embed=True, width=800))
else:
    print("Video file not found. Please run the training first.")