## Kiểm tra quá trình trainning với TensorBoard

python training.py --action TRAIN --train_episodes 1000 --output_dir checkpoints

## B. Vẽ biểu đồ Training Curves
Nếu code lưu metrics, vẽ biểu đồ:

In [1]:
import pandas as pd
import matplotlib.pyplot as plt

# Giả sử có file training_log.csv từ checkpoints/
df = pd.read_csv('checkpoints/training_log.csv')

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Reward
axes[0, 0].plot(df['episode'], df['reward'])
axes[0, 0].set_title('Episode Reward')
axes[0, 0].set_xlabel('Episode')
axes[0, 0].set_ylabel('Reward')
axes[0, 0].grid(True)

# Actor Loss
axes[0, 1].plot(df['episode'], df['actor_loss'], color='green')
axes[0, 1].set_title('Actor Loss')
axes[0, 1].set_xlabel('Episode')
axes[0, 1].grid(True)

# Critic Loss
axes[1, 0].plot(df['episode'], df['critic_loss'], color='red')
axes[1, 0].set_title('Critic Loss')
axes[1, 0].set_xlabel('Episode')
axes[1, 0].grid(True)

# Avg Replenishment vs Sales
axes[1, 1].plot(df['episode'], df['avg_replenishment'], label='Replenishment')
axes[1, 1].plot(df['episode'], df['avg_sales'], label='Sales')
axes[1, 1].set_title('Replenishment vs Sales')
axes[1, 1].legend()
axes[1, 1].grid(True)

plt.tight_layout()
plt.savefig('training_curves.png')
plt.show()

FileNotFoundError: [Errno 2] No such file or directory: 'checkpoints/training_log.csv'

Chạy Prediction Trên Test Set
- python training.py --action PREDICT --output_dir checkpoints --output_file output.csv

## Phân tích kết quả dự đoán

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Parse file output.csv
def parse_output_csv(filepath):
    data = {'timestep': []}
    
    with open(filepath, 'r') as f:
        timestep = 0
        for line in f:
            if ':' not in line:
                continue
            key, values = line.strip().split(':', 1)
            values_list = [float(x) for x in values.split(',')]
            
            if key not in data:
                data[key] = []
            data[key].append(values_list)
            
            if key == 'stock':  # Đếm timestep khi gặp stock
                data['timestep'].append(timestep)
                timestep += 1
    
    # Convert lists to averages
    result = {'timestep': data['timestep']}
    for key in data:
        if key != 'timestep':
            result[f'{key}_mean'] = [np.mean(x) for x in data[key]]
            result[f'{key}_max'] = [np.max(x) for x in data[key]]
            result[f'{key}_min'] = [np.min(x) for x in data[key]]
    
    return pd.DataFrame(result)

# Load data
df = parse_output_csv('output.csv')

# Vẽ biểu đồ đánh giá
fig, axes = plt.subplots(2, 2, figsize=(16, 10))

# 1. Stock Level
axes[0, 0].plot(df['timestep'], df['stock_mean'], label='Mean Stock', color='blue')
axes[0, 0].fill_between(df['timestep'], df['stock_min'], df['stock_max'], 
                         alpha=0.3, label='Min-Max Range')
axes[0, 0].set_title('Inventory Level Over Time')
axes[0, 0].set_xlabel('Timestep')
axes[0, 0].set_ylabel('Stock Level')
axes[0, 0].legend()
axes[0, 0].grid(True)

# 2. Actions (Replenishment)
axes[0, 1].plot(df['timestep'], df['action_mean'], label='Mean Action', color='green')
axes[0, 1].fill_between(df['timestep'], df['action_min'], df['action_max'], 
                         alpha=0.3, color='green')
axes[0, 1].set_title('Replenishment Actions')
axes[0, 1].set_xlabel('Timestep')
axes[0, 1].set_ylabel('Action (% capacity)')
axes[0, 1].legend()
axes[0, 1].grid(True)

# 3. Overstock
axes[1, 0].plot(df['timestep'], df['overstock_mean'], label='Mean Overstock', color='red')
axes[1, 0].set_title('Overstock (Waste)')
axes[1, 0].set_xlabel('Timestep')
axes[1, 0].set_ylabel('Overstock')
axes[1, 0].legend()
axes[1, 0].grid(True)

# 4. Stockout (if available)
if 'stockout_mean' in df.columns:
    axes[1, 1].plot(df['timestep'], df['stockout_mean'], label='Mean Stockout', color='orange')
    axes[1, 1].set_title('Stockout')
    axes[1, 1].set_xlabel('Timestep')
    axes[1, 1].set_ylabel('Stockout')
    axes[1, 1].legend()
    axes[1, 1].grid(True)

plt.tight_layout()
plt.savefig('prediction_analysis.png', dpi=300)
plt.show()

## Tính Metrics đánh giá mô hình

In [None]:
import numpy as np

# Load parsed data
df = parse_output_csv('output.csv')

# Calculate KPIs
metrics = {
    'Average Stock Level': df['stock_mean'].mean(),
    'Stock Std Dev': df['stock_mean'].std(),
    'Total Overstock': df['overstock_mean'].sum(),
    'Overstock Rate (%)': (df['overstock_mean'] > 0).sum() / len(df) * 100,
    'Average Replenishment': df['action_mean'].mean(),
}

if 'stockout_mean' in df.columns:
    metrics['Total Stockout'] = df['stockout_mean'].sum()
    metrics['Stockout Rate (%)'] = (df['stockout_mean'] > 0).sum() / len(df) * 100

# Print metrics
print("=" * 50)
print("MODEL EVALUATION METRICS")
print("=" * 50)
for key, value in metrics.items():
    print(f"{key:30s}: {value:10.4f}")
print("=" * 50)

# Đánh giá chất lượng
print("\nGOOD MODEL IF:")
print("  - Stockout Rate < 5%")
print("  - Overstock Rate < 10%")
print("  - Stock Std Dev < 0.2 (stable inventory)")
print("  - Average Stock Level: 0.3 - 0.5 (30-50% capacity)")

## Phân tích chi tiết từng sản phẩm

In [None]:
# Parse raw data (keep per-product info)
def parse_product_data(filepath, product_id=0):
    """Extract time series for one product"""
    stock, action, overstock = [], [], []
    
    with open(filepath, 'r') as f:
        for line in f:
            if ':' not in line:
                continue
            key, values = line.strip().split(':', 1)
            values_list = [float(x) for x in values.split(',')]
            
            if key == 'stock':
                stock.append(values_list[product_id])
            elif key == 'action':
                action.append(values_list[product_id])
            elif key == 'overstock':
                overstock.append(values_list[product_id])
    
    return stock, action, overstock

# Vẽ biểu đồ cho sản phẩm cụ thể
product_id = 0  # Thay đổi để xem sản phẩm khác
stock, action, overstock = parse_product_data('output.csv', product_id)

plt.figure(figsize=(14, 6))
timesteps = range(len(stock))

plt.subplot(1, 2, 1)
plt.plot(timesteps, stock, label='Stock Level', color='blue')
plt.plot(timesteps, action, label='Replenishment', color='green', linestyle='--')
plt.title(f'Product {product_id}: Stock vs Replenishment')
plt.xlabel('Timestep')
plt.ylabel('Value')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(timesteps, overstock, label='Overstock', color='red')
plt.title(f'Product {product_id}: Overstock')
plt.xlabel('Timestep')
plt.ylabel('Overstock')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.savefig(f'product_{product_id}_analysis.png', dpi=300)
plt.show()

## So sánh Base line

In [None]:
def baseline_policy(stock, sales, capacity):
    """Simple reorder point policy"""
    reorder_point = 0.3  # 30% capacity
    order_quantity = 0.5  # 50% capacity
    
    if stock < reorder_point:
        return min(order_quantity, 1.0 - stock)
    return 0.0

# Simulate baseline trên cùng test data
# (code tương tự như trong training.py nhưng dùng baseline_policy)