In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Football AI Agent - Quick Start\n",
    "\n",
    "This notebook provides a quick introduction to training and evaluating football AI agents.\n",
    "\n",
    "## Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import numpy as np\n",
    "import torch\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "# Add project root to path\n",
    "sys.path.append('..')\n",
    "\n",
    "from src.agents.dqn_agent import DQNAgent\n",
    "from src.agents.ppo_agent import PPOAgent\n",
    "from src.environment.wrappers import create_football_env\n",
    "from src.utils.metrics import PerformanceTracker\n",
    "\n",
    "print(f\"PyTorch version: {torch.__version__}\")\n",
    "print(f\"CUDA available: {torch.cuda.is_available()}\")\n",
    "\n",
    "# Set style\n",
    "sns.set_style('whitegrid')\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Create Environment\n",
    "\n",
    "Let's create a simple football environment to start with."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create environment\n",
    "env = create_football_env(\n",
    "    scenario='academy_empty_goal',\n",
    "    frame_skip=4,\n",
    "    reward_shaping=True,\n",
    "    render=False\n",
    ")\n",
    "\n",
    "print(f\"State dimension: {env.observation_space.shape[0]}\")\n",
    "print(f\"Action dimension: {env.action_space.n}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Create and Train Agent\n",
    "\n",
    "We'll train a PPO agent for a few episodes as a demonstration."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Agent configuration\n",
    "config = {\n",
    "    'learning_rate': 3e-4,\n",
    "    'gamma': 0.99,\n",
    "    'gae_lambda': 0.95,\n",
    "    'clip_epsilon': 0.2,\n",
    "    'value_coef': 0.5,\n",
    "    'entropy_coef': 0.01,\n",
    "    'rollout_length': 2048,\n",
    "    'batch_size': 64,\n",
    "    'update_epochs': 4,\n",
    "    'use_cuda': torch.cuda.is_available()\n",
    "}\n",
    "\n",
    "# Create agent\n",
    "agent = PPOAgent(\n",
    "    state_dim=env.observation_space.shape[0],\n",
    "    action_dim=env.action_space.n,\n",
    "    config=config\n",
    ")\n",
    "\n",
    "print(\"Agent created successfully!\")\n",
    "print(f\"Device: {agent.device}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Training loop (simplified)\n",
    "num_episodes = 100  # Short demo\n",
    "tracker = PerformanceTracker()\n",
    "episode_rewards = []\n",
    "\n",
    "print(f\"Training for {num_episodes} episodes...\")\n",
    "\n",
    "for episode in range(num_episodes):\n",
    "    state = env.reset()\n",
    "    episode_reward = 0\n",
    "    episode_steps = 0\n",
    "    done = False\n",
    "    \n",
    "    while not done:\n",
    "        # Select action\n",
    "        action = agent.select_action(state, training=True)\n",
    "        \n",
    "        # Take step\n",
    "        next_state, reward, done, info = env.step(action)\n",
    "        \n",
    "        # Store transition\n",
    "        agent.store_transition(state, action, reward, next_state, done)\n",
    "        \n",
    "        # Update\n",
    "        if len(agent.buffer) >= agent.rollout_length:\n",
    "            agent.update()\n",
    "        \n",
    "        state = next_state\n",
    "        episode_reward += reward\n",
    "        episode_steps += 1\n",
    "    \n",
    "    # Track performance\n",
    "    episode_rewards.append(episode_reward)\n",
    "    tracker.add_episode(episode_reward, episode_steps, info)\n",
    "    \n",
    "    # Progress\n",
    "    if (episode + 1) % 20 == 0:\n",
    "        recent_mean = np.mean(episode_rewards[-20:])\n",
    "        print(f\"Episode {episode + 1}/{num_episodes} - Recent Mean Reward: {recent_mean:.2f}\")\n",
    "\n",
    "print(\"\\nTraining completed!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Visualize Training Progress"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot training curve\n",
    "plt.figure(figsize=(12, 5))\n",
    "\n",
    "plt.subplot(1, 2, 1)\n",
    "plt.plot(episode_rewards, alpha=0.6, label='Episode Reward')\n",
    "# Moving average\n",
    "window = 20\n",
    "if len(episode_rewards) >= window:\n",
    "    moving_avg = np.convolve(episode_rewards, np.ones(window)/window, mode='valid')\n",
    "    plt.plot(range(window-1, len(episode_rewards)), moving_avg, \n",
    "             linewidth=2, label=f'{window}-Episode Moving Average')\n",
    "plt.xlabel('Episode')\n",
    "plt.ylabel('Reward')\n",
    "plt.title('Training Progress')\n",
    "plt.legend()\n",
    "plt.grid(True, alpha=0.3)\n",
    "\n",
    "plt.subplot(1, 2, 2)\n",
    "plt.hist(episode_rewards, bins=20, edgecolor='black', alpha=0.7)\n",
    "plt.xlabel('Episode Reward')\n",
    "plt.ylabel('Frequency')\n",
    "plt.title('Reward Distribution')\n",
    "plt.grid(True, alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Evaluate Agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Evaluate on 20 episodes\n",
    "eval_rewards = []\n",
    "eval_scores = []\n",
    "\n",
    "print(\"Evaluating agent...\")\n",
    "\n",
    "for episode in range(20):\n",
    "    state = env.reset()\n",
    "    episode_reward = 0\n",
    "    done = False\n",
    "    \n",
    "    while not done:\n",
    "        action = agent.select_action(state, training=False)\n",
    "        state, reward, done, info = env.step(action)\n",
    "        episode_reward += reward\n",
    "    \n",
    "    eval_rewards.append(episode_reward)\n",
    "    eval_scores.append(info.get('score_reward', 0))\n",
    "\n",
    "# Print statistics\n",
    "print(f\"\\nEvaluation Results ({len(eval_rewards)} episodes):\")\n",
    "print(f\"  Mean Reward: {np.mean(eval_rewards):.2f} Â± {np.std(eval_rewards):.2f}\")\n",
    "print(f\"  Win Rate: {np.mean(np.array(eval_scores) > 0):.2%}\")\n",
    "print(f\"  Mean Score: {np.mean(eval_scores):.2f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Save Agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save trained agent\n",
    "save_path = '../checkpoints/notebook_demo_agent.pth'\n",
    "agent.save(save_path)\n",
    "print(f\"Agent saved to {save_path}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Next Steps\n",
    "\n",
    "1. Train for more episodes for better performance\n",
    "2. Try different scenarios (e.g., '11_vs_11_stochastic')\n",
    "3. Experiment with hybrid agents (PPO + LightGBM)\n",
    "4. Compare DQN vs PPO performance\n",
    "5. Benchmark CPU vs GPU training speed\n",
    "\n",
    "See the other notebooks for more advanced examples!"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}