# ðŸ§  DQN Language Learning - Model Training Notebook

Bu notebook'ta DQN modelini offline olarak eÄŸiteceÄŸiz ve sonuÃ§larÄ± analiz edeceÄŸiz.

In [None]:
import sys
sys.path.append('../backend')

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from dqn_agent import DQNAgent
from rl_environment import LanguageLearningEnv
import pandas as pd

# Visualization settings
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("âœ… Imports successful!")

## 1. Sample Word Pool OluÅŸturma

In [None]:
# Sample word pool (50 kelime)
word_pool = [
    {"id": f"{i}", "word": f"word_{i}", "translation": f"kelime_{i}", 
     "difficulty": (i % 5) + 1, "language": "en"}
    for i in range(1, 51)
]

# Zorluk daÄŸÄ±lÄ±mÄ±
difficulty_dist = pd.Series([w['difficulty'] for w in word_pool]).value_counts().sort_index()
print("ðŸ“Š Zorluk DaÄŸÄ±lÄ±mÄ±:")
print(difficulty_dist)

# GÃ¶rselleÅŸtirme
plt.figure(figsize=(8, 5))
difficulty_dist.plot(kind='bar', color='skyblue')
plt.title('Word Pool - Difficulty Distribution')
plt.xlabel('Difficulty Level')
plt.ylabel('Count')
plt.xticks(rotation=0)
plt.show()

## 2. Environment ve Agent OluÅŸturma

In [None]:
   "source": [
    "# Create environment\n",
    "env = LanguageLearningEnv(word_pool=word_pool)\n",
    "\n",
    "print(\"ðŸ“¦ Environment:\")\n",
    "print(f\"  State Space: {env.observation_space.shape}\")\n",
    "print(f\"  Action Space: {env.action_space.n}\")\n",
    "\n",
    "# Create DQN agent (PyTorch)\n",
    "agent = DQNAgent(\n",
    "    state_size=12,\n",
    "    action_size=5,\n",
    "    learning_rate=0.001,\n",
    "    epsilon=1.0,\n",
    "    epsilon_decay=0.995,\n",
    "    batch_size=32\n",
    ")\n",
    "\n",
    "print(\"\\nðŸ¤– DQN Agent (PyTorch):\")\n",
    "print(f\"  Device: {agent.device}\")\n",
    "print(f\"  Initial Epsilon: {agent.epsilon}\")\n",
    "print(f\"  Learning Rate: {agent.learning_rate}\")\n",
    "print(f\"  Batch Size: {agent.batch_size}\")\n",
    "print(f\"  Model Parameters: {sum(p.numel() for p in agent.model.parameters()):,}\")"
   ]

## 3. Model Architecture

In [None]:
# Model summary
agent.model.summary()

## 4. Training Loop (100 Episodes)

In [None]:
# Training parameters
EPISODES = 100
UPDATE_TARGET_FREQ = 10

# Metrics
episode_rewards = []
episode_lengths = []
epsilon_values = []
losses = []

print("ðŸš€ Starting training...\n")

for episode in range(EPISODES):
    state, _ = env.reset()
    episode_reward = 0
    episode_length = 0
    done = False
    
    while not done:
        # Select action
        action = agent.act(state, training=True)
        
        # Take step
        next_state, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        
        # Remember
        agent.remember(state, action, reward, next_state, done)
        
        # Learn
        loss = agent.replay()
        if loss > 0:
            losses.append(loss)
        
        state = next_state
        episode_reward += reward
        episode_length += 1
    
    # Update target network
    if (episode + 1) % UPDATE_TARGET_FREQ == 0:
        agent.update_target_model()
    
    # Record metrics
    episode_rewards.append(episode_reward)
    episode_lengths.append(episode_length)
    epsilon_values.append(agent.epsilon)
    
    # Print progress
    if (episode + 1) % 10 == 0:
        avg_reward = np.mean(episode_rewards[-10:])
        print(f"Episode {episode + 1}/{EPISODES} | "
              f"Reward: {episode_reward:.2f} | "
              f"Avg (10): {avg_reward:.2f} | "
              f"Epsilon: {agent.epsilon:.3f}")

print("\nâœ… Training completed!")

## 5. Results Visualization

In [None]:
# Create figure with subplots
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# 1. Episode Rewards
axes[0, 0].plot(episode_rewards, alpha=0.6, label='Episode Reward')
axes[0, 0].plot(pd.Series(episode_rewards).rolling(10).mean(), 
                linewidth=2, label='Moving Avg (10)')
axes[0, 0].set_title('Episode Rewards', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Episode')
axes[0, 0].set_ylabel('Total Reward')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# 2. Epsilon Decay
axes[0, 1].plot(epsilon_values, color='orange', linewidth=2)
axes[0, 1].set_title('Epsilon Decay (Exploration Rate)', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('Episode')
axes[0, 1].set_ylabel('Epsilon')
axes[0, 1].grid(True, alpha=0.3)

# 3. Episode Lengths
axes[1, 0].plot(episode_lengths, color='green', alpha=0.6)
axes[1, 0].set_title('Episode Lengths', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('Episode')
axes[1, 0].set_ylabel('Steps')
axes[1, 0].grid(True, alpha=0.3)

# 4. Training Loss
if losses:
    axes[1, 1].plot(losses, color='red', alpha=0.4)
    axes[1, 1].plot(pd.Series(losses).rolling(50).mean(), 
                    linewidth=2, label='Moving Avg (50)')
    axes[1, 1].set_title('Training Loss', fontsize=14, fontweight='bold')
    axes[1, 1].set_xlabel('Training Step')
    axes[1, 1].set_ylabel('Loss')
    axes[1, 1].legend()
    axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 6. Performance Statistics

In [None]:
# Statistics
stats = {
    'Total Episodes': EPISODES,
    'Avg Reward (All)': np.mean(episode_rewards),
    'Avg Reward (Last 20)': np.mean(episode_rewards[-20:]),
    'Max Reward': np.max(episode_rewards),
    'Min Reward': np.min(episode_rewards),
    'Final Epsilon': epsilon_values[-1],
    'Avg Episode Length': np.mean(episode_lengths)
}

print("\nðŸ“Š Training Statistics:")
print("=" * 50)
for key, value in stats.items():
    print(f"{key:.<30} {value:.4f}")
print("=" * 50)

## 7. Test Agent (Inference Mode)

In [None]:
# Test with epsilon=0 (pure exploitation)
agent.epsilon = 0.0

test_episodes = 5
test_rewards = []

print("ðŸ§ª Testing agent (greedy policy)...\n")

for episode in range(test_episodes):
    state, _ = env.reset()
    episode_reward = 0
    done = False
    step = 0
    
    print(f"Test Episode {episode + 1}:")
    
    while not done and step < 10:  # Max 10 steps for demo
        action = agent.act(state, training=False)
        q_values = agent.get_q_values(state)
        
        print(f"  Step {step + 1}: Action={action} (Difficulty Level), Q-values={q_values}")
        
        next_state, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        
        print(f"    â†’ Reward: {reward:.2f}, Word: {info['word']}, Correct: {info['is_correct']}")
        
        state = next_state
        episode_reward += reward
        step += 1
    
    test_rewards.append(episode_reward)
    print(f"  Total Reward: {episode_reward:.2f}\n")

print(f"\nðŸ“Š Test Results:")
print(f"  Avg Test Reward: {np.mean(test_rewards):.2f}")
print(f"  Max Test Reward: {np.max(test_rewards):.2f}")

## 8. Save Model

In [None]:
# Save trained model
save_path = "../backend/models/dqn_notebook"
agent.save(save_path)

print(f"âœ… Model saved to: {save_path}")

## 9. Action Distribution Analysis

In [None]:
# Analyze action selection
action_counts = [0] * 5

for _ in range(100):
    state, _ = env.reset()
    action = agent.act(state, training=False)
    action_counts[action] += 1

# Visualization
difficulty_names = ['Beginner', 'Elementary', 'Intermediate', 'Advanced', 'Expert']

plt.figure(figsize=(10, 6))
plt.bar(difficulty_names, action_counts, color=['#3498db', '#2ecc71', '#f39c12', '#e74c3c', '#9b59b6'])
plt.title('Action Distribution (100 Random States)', fontsize=14, fontweight='bold')
plt.xlabel('Difficulty Level')
plt.ylabel('Selection Count')
plt.grid(True, alpha=0.3, axis='y')
plt.show()

print("\nðŸ“Š Action Distribution:")
for name, count in zip(difficulty_names, action_counts):
    print(f"  {name:.<20} {count:>3} ({count/100*100:.1f}%)")

## ðŸŽ‰ Notebook Complete!

Bu notebook'ta:
- âœ… DQN agent'i baÅŸarÄ±yla eÄŸittik
- âœ… Training metriklerini gÃ¶rselleÅŸtirdik
- âœ… Model'i test ettik
- âœ… Action distribution analizi yaptÄ±k

**Sonraki AdÄ±mlar:**
1. Modeli backend API'ye yÃ¼kle
2. Mobil uygulamada test et
3. Dashboard'da visualize et