# Deep Q-Learning Galaxian - Google Colab Setup

This notebook sets up and runs the DQN training experiments on Google Colab.

This notebook uses GPU. Make sure to enable GPU acceleration:
- Go to Runtime → Change runtime type
- Select GPU as hardware accelerator

## Cell 1: Clone Repository

In [None]:
# Clone the GitHub repository
!git clone https://github.com/YOUR_USERNAME/deep-q-learning-galaxian.git
%cd deep-q-learning-galaxian
!pwd

## Cell 2: Install Dependencies

In [None]:
# Install required packages
!pip install -q gymnasium[atari]
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q ale-py numpy matplotlib pandas jupyter

print("All dependencies installed!")

## Cell 3: Verify Installation

In [None]:
# Check installations
import sys
sys.path.insert(0, '/content/deep-q-learning-galaxian')

import torch
import gymnasium as gym
import numpy as np

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Device: {torch.device('cuda' if torch.cuda.is_available() else 'cpu')}")

# Test environment
env = gym.make("ALE/Galaxian-v5")
state, info = env.reset()
print(f"\nGymnasium Galaxian environment loaded!")
print(f"State shape: {state.shape}")
print(f"Action space: {env.action_space.n} actions")
env.close()

## Cell 4: Run Baseline Training

In [None]:
# Run baseline training
!python experiments/baseline.py

## Cell 5: Run Bellman Experiments

In [None]:
# Run Bellman parameter experiments
!python experiments/bellman_exp.py

## Cell 6: Run Policy Experiments

In [None]:
# Run policy exploration experiments
!python experiments/policy_exp.py

## Cell 7: Run Decay Experiments

In [None]:
# Run exploration decay experiments
!python experiments/decay_exp.py

## Cell 8: Verify Results

In [None]:
import os
import pandas as pd

# Check what results were saved
results_dirs = [
    'results/baseline',
    'results/bellman',
    'results/policy',
    'results/decay'
]

print("Results saved:")
print("=" * 50)

for results_dir in results_dirs:
    if os.path.exists(results_dir):
        files = os.listdir(results_dir)
        print(f"\n{results_dir}:")
        for f in files:
            print(f"  - {f}")
    else:
        print(f"\n{results_dir}: NOT FOUND")

## Cell 9: Load and Analyze Baseline

In [None]:
# Load baseline metrics
baseline_metrics = pd.read_csv('results/baseline/metrics.csv')

print("Baseline Training Results")
print("=" * 50)
print(f"Total episodes: {len(baseline_metrics)}")
print(f"\nReward Statistics:")
print(f"  Mean reward: {baseline_metrics['reward'].mean():.2f}")
print(f"  Max reward: {baseline_metrics['reward'].max():.2f}")
print(f"  Min reward: {baseline_metrics['reward'].min():.2f}")
print(f"\nFinal 100-episode average:")
print(f"  Avg reward: {baseline_metrics['moving_avg_reward'].iloc[-1]:.2f}")
print(f"  Avg length: {baseline_metrics['moving_avg_length'].iloc[-1]:.2f}")
print(f"\nExploration:")
print(f"  Final epsilon: {baseline_metrics['epsilon'].iloc[-1]:.6f}")

# Show first few rows
print(f"\nFirst 5 episodes:")
print(baseline_metrics.head())

## Cell 10: Plot Training Curves

In [None]:
import matplotlib.pyplot as plt

# Create training curves
fig, axes = plt.subplots(2, 2, figsize=(14, 8))

# Plot 1: Reward over time
axes[0, 0].plot(baseline_metrics['episode'], baseline_metrics['reward'], 
                 alpha=0.5, label='Episode Reward')
axes[0, 0].plot(baseline_metrics['episode'], baseline_metrics['moving_avg_reward'], 
                 'r-', linewidth=2, label='Moving Average (100 ep)')
axes[0, 0].set_xlabel('Episode')
axes[0, 0].set_ylabel('Reward')
axes[0, 0].set_title('Reward Over Training')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Plot 2: Episode length
axes[0, 1].plot(baseline_metrics['episode'], baseline_metrics['length'], 
                 alpha=0.5, label='Episode Length')
axes[0, 1].plot(baseline_metrics['episode'], baseline_metrics['moving_avg_length'], 
                 'r-', linewidth=2, label='Moving Average')
axes[0, 1].set_xlabel('Episode')
axes[0, 1].set_ylabel('Steps')
axes[0, 1].set_title('Episode Length Over Training')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Plot 3: Epsilon decay
axes[1, 0].plot(baseline_metrics['episode'], baseline_metrics['epsilon'], 
                 'g-', linewidth=2)
axes[1, 0].set_xlabel('Episode')
axes[1, 0].set_ylabel('Epsilon')
axes[1, 0].set_title('Exploration Rate Decay')
axes[1, 0].grid(True, alpha=0.3)

# Plot 4: Reward distribution
axes[1, 1].hist(baseline_metrics['reward'], bins=30, alpha=0.7, 
                 edgecolor='black', color='blue')
axes[1, 1].set_xlabel('Episode Reward')
axes[1, 1].set_ylabel('Frequency')
axes[1, 1].set_title('Reward Distribution')
axes[1, 1].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('results/baseline/training_curves.png', dpi=150, bbox_inches='tight')
plt.show()

print("Training curves saved to results/baseline/training_curves.png")

## Cell 11: Compare Experiment Results

In [None]:
# Load summary results from experiments
print("\nEXPERIMENT RESULTS SUMMARY")
print("=" * 70)

# Bellman experiments
if os.path.exists('results/bellman/summary.csv'):
    bellman_summary = pd.read_csv('results/bellman/summary.csv')
    print("\nBellman Parameter Experiments:")
    print(bellman_summary.to_string(index=False))

# Policy experiments
if os.path.exists('results/policy/summary.csv'):
    policy_summary = pd.read_csv('results/policy/summary.csv')
    print("\nPolicy Exploration Experiments:")
    print(policy_summary.to_string(index=False))

# Decay experiments
if os.path.exists('results/decay/summary.csv'):
    decay_summary = pd.read_csv('results/decay/summary.csv')
    print("\nExploration Decay Experiments:")
    print(decay_summary.to_string(index=False))

## Cell 12: Download Results

In [None]:
# Zip results for download
import shutil

# Create zip file
shutil.make_archive('dqn_results', 'zip', 'results')

# Download results
from google.colab import files
files.download('dqn_results.zip')

print("Results zipped and ready for download!")
print("  File: dqn_results.zip")

## Cell 13: Commit Results to GitHub

In [None]:
# Push results back to GitHub
!git config user.name "NavishaShetty"
!git config user.email "shetty.navi@northeastern.edu"

# Add results
!git add results/

# Commit
!git commit -m "Add training results from Google Colab"

# Push to GitHub (requires authentication)
# !git push origin main

print("✓ Results committed (not pushed - requires authentication)")