In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Enhanced RL Trading System - Quick Start Guide\n",
    "\n",
    "This notebook provides a quick introduction to using the Enhanced RL Trading System for Indian equity markets."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Setup and Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import os\n",
    "sys.path.append('..')\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import torch\n",
    "import yaml\n",
    "\n",
    "from src.environment import EnhancedTradingEnvironmentV2\n",
    "from src.agent import AttentionTradingAgent\n",
    "from src.trainer import ImprovedPPOTrainer\n",
    "from src.utils import set_seeds, evaluate_enhanced_agent, plot_results\n",
    "\n",
    "# Set random seeds for reproducibility\n",
    "set_seeds(42)\n",
    "\n",
    "print(\"PyTorch version:\", torch.__version__)\n",
    "print(\"CUDA available:\", torch.cuda.is_available())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Load Configuration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load configuration\n",
    "with open('../config.yaml', 'r') as f:\n",
    "    config = yaml.safe_load(f)\n",
    "\n",
    "# Display key configuration parameters\n",
    "print(\"Configuration Overview:\")\n",
    "print(f\"  Initial Cash: ₹{config['environment']['initial_cash']:,}\")\n",
    "print(f\"  Episode Length: {config['environment']['episode_length']} days\")\n",
    "print(f\"  Assets: {config['assets']['stocks']}\")\n",
    "print(f\"  Max Drawdown Limit: {config['environment']['max_drawdown_limit']*100:.0f}%\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Create Trading Environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create environment\n",
    "env = EnhancedTradingEnvironmentV2(config)\n",
    "\n",
    "print(f\"Environment created successfully!\")\n",
    "print(f\"  Number of assets: {env.n_assets}\")\n",
    "print(f\"  State dimension: {env.state_dim}\")\n",
    "print(f\"  Data points loaded: {env.data_length}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Visualize Market Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot price data for all assets\n",
    "fig, axes = plt.subplots(len(env.symbols), 1, figsize=(12, 3*len(env.symbols)))\n",
    "\n",
    "if len(env.symbols) == 1:\n",
    "    axes = [axes]\n",
    "\n",
    "for i, symbol in enumerate(env.symbols):\n",
    "    prices = env.data[symbol]['close']\n",
    "    axes[i].plot(prices)\n",
    "    axes[i].set_title(f'{symbol} - Closing Prices')\n",
    "    axes[i].set_xlabel('Days')\n",
    "    axes[i].set_ylabel('Price (₹)')\n",
    "    axes[i].grid(True, alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Create and Initialize Agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create agent\n",
    "agent = AttentionTradingAgent(\n",
    "    state_dim=env.state_dim,\n",
    "    n_assets=env.n_assets,\n",
    "    hidden_dim=config['agent']['hidden_dim']\n",
    ")\n",
    "\n",
    "# Count parameters\n",
    "total_params = sum(p.numel() for p in agent.parameters())\n",
    "trainable_params = sum(p.numel() for p in agent.parameters() if p.requires_grad)\n",
    "\n",
    "print(f\"Agent Architecture:\")\n",
    "print(f\"  Total parameters: {total_params:,}\")\n",
    "print(f\"  Trainable parameters: {trainable_params:,}\")\n",
    "print(f\"  Hidden dimension: {agent.hidden_dim}\")\n",
    "print(f\"  Number of attention heads: 8\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Test Environment Step"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Test a single environment step\n",
    "state = env.reset()\n",
    "print(f\"Initial state shape: {state.shape}\")\n",
    "print(f\"Initial portfolio value: ₹{env._calculate_portfolio_value():,.2f}\")\n",
    "\n",
    "# Get an action from the agent\n",
    "state_tensor = torch.FloatTensor(state).unsqueeze(0)\n",
    "actions, confidences, value = agent.get_action(state_tensor, epsilon=0.1)\n",
    "\n",
    "print(f\"\\nAgent outputs:\")\n",
    "print(f\"  Actions: {actions} ({[['SELL', 'HOLD', 'BUY'][a] for a in actions]})\")\n",
    "print(f\"  Confidences: {[f'{c:.3f}' for c in confidences]}\")\n",
    "print(f\"  Value estimate: {value.item():.3f}\")\n",
    "\n",
    "# Execute step\n",
    "next_state, reward, done, info = env.step(actions, confidences)\n",
    "\n",
    "print(f\"\\nStep results:\")\n",
    "print(f\"  Reward: {reward:.3f}\")\n",
    "print(f\"  Done: {done}\")\n",
    "print(f\"  Trades executed: {info['trades']}\")\n",
    "print(f\"  New portfolio value: ₹{info['portfolio_value']:,.2f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Quick Training Demo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a trainer for a quick demo\n",
    "# Note: This is just for demonstration. Real training takes much longer!\n",
    "demo_config = config['training'].copy()\n",
    "demo_config['n_epochs'] = 3  # Reduced for demo\n",
    "demo_config['n_steps'] = 256  # Reduced for demo\n",
    "\n",
    "trainer = ImprovedPPOTrainer(\n",
    "    env=env,\n",
    "    agent=agent,\n",
    "    config=demo_config\n",
    ")\n",
    "\n",
    "print(\"Training for 10 episodes (demo only)...\")\n",
    "trainer.train(n_episodes=10)\n",
    "\n",
    "print(\"\\nDemo training completed!\")\n",
    "print(f\"Episodes completed: {len(trainer.episode_rewards)}\")\n",
    "print(f\"Average reward: {np.mean(trainer.episode_rewards):.2f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. Evaluate Agent Performance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Evaluate the agent (even though it's barely trained)\n",
    "print(\"Evaluating agent performance...\")\n",
    "eval_results = evaluate_enhanced_agent(\n",
    "    env=env,\n",
    "    agent=agent,\n",
    "    n_episodes=5,  # Just a few episodes for demo\n",
    "    epsilon=0.05\n",
    ")\n",
    "\n",
    "# Display results\n",
    "print(\"\\nEvaluation Results:\")\n",
    "print(f\"  Average Return: {eval_results['net_return'].mean():.2f}%\")\n",
    "print(f\"  Average Sharpe: {eval_results['sharpe_ratio'].mean():.2f}\")\n",
    "print(f\"  Average Win Rate: {eval_results['win_rate'].mean():.1%}\")\n",
    "print(f\"  Average Trades: {eval_results['trades'].mean():.1f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 9. Visualize Results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Simple visualization of results\n",
    "fig, axes = plt.subplots(2, 2, figsize=(12, 8))\n",
    "\n",
    "# Returns distribution\n",
    "axes[0, 0].hist(eval_results['net_return'], bins=20, alpha=0.7, color='blue')\n",
    "axes[0, 0].axvline(eval_results['net_return'].mean(), color='red', linestyle='--')\n",
    "axes[0, 0].set_title('Returns Distribution')\n",
    "axes[0, 0].set_xlabel('Return (%)')\n",
    "\n",
    "# Sharpe ratios\n",
    "axes[0, 1].plot(eval_results['sharpe_ratio'], marker='o')\n",
    "axes[0, 1].axhline(eval_results['sharpe_ratio'].mean(), color='red', linestyle='--')\n",
    "axes[0, 1].set_title('Sharpe Ratio by Episode')\n",
    "axes[0, 1].set_xlabel('Episode')\n",
    "\n",
    "# Win rate vs returns\n",
    "axes[1, 0].scatter(eval_results['win_rate']*100, eval_results['net_return'])\n",
    "axes[1, 0].set_title('Win Rate vs Returns')\n",
    "axes[1, 0].set_xlabel('Win Rate (%)')\n",
    "axes[1, 0].set_ylabel('Return (%)')\n",
    "\n",
    "# Confidence levels\n",
    "axes[1, 1].hist(eval_results['avg_confidence'], bins=20, alpha=0.7, color='green')\n",
    "axes[1, 1].set_title('Average Confidence Distribution')\n",
    "axes[1, 1].set_xlabel('Confidence')\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 10. Next Steps\n",
    "\n",
    "This was just a quick introduction! For real trading performance:\n",
    "\n",
    "1. **Full Training**: Train for 1500+ episodes (see `train_agent.py`)\n",
    "2. **Hyperparameter Tuning**: Adjust learning rates, network architecture\n",
    "3. **Data Enhancement**: Add more stocks, longer history\n",
    "4. **Advanced Features**: Implement market regime detection, correlation analysis\n",
    "5. **Backtesting**: Run comprehensive backtests on out-of-sample data\n",
    "\n",
    "Check the full documentation and examples for more details!"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}