In [None]:
# 🔧 Setup: Run this cell first!
# Check GPU availability and install dependencies

import torch
import sys

# Check GPU
if torch.cuda.is_available():
    device = torch.device('cuda')
    print(f"✅ GPU available: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    device = torch.device('cpu')
    print("⚠️ No GPU detected. Some cells may run slowly.")
    print("   Go to Runtime → Change runtime type → GPU")

print(f"\n📦 Python {sys.version.split()[0]}")
print(f"🔥 PyTorch {torch.__version__}")

# Set random seeds for reproducibility
import random
import numpy as np

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

print(f"🎲 Random seed set to {SEED}")

%matplotlib inline

# Policy Gradient Methods — Vizuara Notebook Series

Welcome to the Vizuara notebook series on **Policy Gradient Methods**. This series takes you from the foundations of parameterized policies to Actor-Critic methods, building everything from scratch in PyTorch.

## Learning Path

In [None]:
print("=" * 60)
print("  POLICY GRADIENT METHODS — NOTEBOOK SERIES")
print("=" * 60)
print()
print("  Notebook 1: Policy Gradient Foundations")
print("  ├── Softmax policy parameterization")
print("  ├── Performance measure J(theta)")
print("  ├── Policy gradient theorem derivation")
print("  └── Estimated time: 45 minutes")
print()
print("  Notebook 2: REINFORCE from Scratch")
print("  ├── REINFORCE algorithm implementation")
print("  ├── Variance problem diagnosis")
print("  ├── Baseline variance reduction")
print("  └── Estimated time: 50 minutes")
print()
print("  Notebook 3: Actor-Critic Methods")
print("  ├── Actor and Critic networks")
print("  ├── Advantage-based learning")
print("  ├── Head-to-head comparison of all methods")
print("  └── Estimated time: 55 minutes")
print()
print("  Total estimated time: ~2.5 hours")
print("=" * 60)

## Prerequisites

- Basic Python and PyTorch
- Understanding of neural networks (linear layers, backpropagation)
- Familiarity with reinforcement learning concepts (states, actions, rewards)

## Environment Setup

In [None]:
!pip install -q gymnasium torch numpy matplotlib
import gymnasium as gym
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"Gymnasium version: {gym.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print("\nAll dependencies installed. You are ready to begin!")

## How to Use These Notebooks

1. Run each notebook from top to bottom in Google Colab
2. Complete the TODO exercises — they are designed to test your understanding
3. Experiment with the optional challenges at the end of each notebook
4. Each notebook is self-contained — you can start from any notebook

Open each notebook in Colab by clicking the links below.