# DBS-torchRL 학습 시각화
TensorBoard 로그(`./ppo_tensorboard/`)를 읽어 학습 진행 상황을 시각화합니다.

In [None]:
import os
import glob
from collections import defaultdict

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator

%matplotlib inline
plt.rcParams['figure.dpi'] = 120
plt.rcParams['axes.unicode_minus'] = False

# 한글 폰트 (없으면 기본 폰트 사용)
try:
    import matplotlib.font_manager as fm
    nanum = [f for f in fm.findSystemFonts() if 'NanumGothic' in f]
    if nanum:
        plt.rcParams['font.family'] = fm.FontProperties(fname=nanum[0]).get_name()
except Exception:
    pass

## 1. 로그 로딩

In [None]:
def load_scalars(logdir: str) -> dict:
    """TensorBoard 로그에서 scalar 데이터를 읽어옵니다."""
    ea = EventAccumulator(logdir)
    ea.Reload()
    tags = ea.Tags().get('scalars', [])
    data = {}
    for tag in tags:
        events = ea.Scalars(tag)
        data[tag] = {
            'steps': [e.step for e in events],
            'values': [e.value for e in events],
        }
    return data


def smooth(values: list, weight: float = 0.6) -> list:
    """지수 이동 평균 (EMA) 스무딩."""
    smoothed = []
    last = values[0] if values else 0
    for v in values:
        s = last * weight + (1 - weight) * v
        smoothed.append(s)
        last = s
    return smoothed


# ── 로그 디렉토리 탐색 ──
BASE_DIR = "./ppo_tensorboard"

subdirs = sorted([d for d in glob.glob(os.path.join(BASE_DIR, "*")) if os.path.isdir(d)])
print(f"발견된 run: {len(subdirs)}개")
for d in subdirs:
    print(f"  {d}")

# 최신 로그 사용 (특정 run 사용 시 아래 경로를 직접 변경)
LOGDIR = subdirs[-1] if subdirs else BASE_DIR
print(f"\n사용할 로그: {LOGDIR}")

data = load_scalars(LOGDIR)
print(f"메트릭 수: {len(data)}개")
print(f"메트릭 목록: {list(data.keys())}")

## 2. 학습 요약 통계

In [None]:
def print_summary(data):
    print("=" * 60)
    print(f"  학습 요약: {os.path.basename(LOGDIR)}")
    print("=" * 60)

    def _stat(tag, label, fmt=".2f"):
        if tag in data:
            v = data[tag]['values']
            print(f"  {label}: min={min(v):{fmt}}  avg={sum(v)/len(v):{fmt}}  max={max(v):{fmt}}")

    if 'episode/reward' in data:
        print(f"  에피소드 수: {len(data['episode/reward']['values'])}")
    _stat('episode/reward', '보상')
    _stat('episode/psnr_diff', 'PSNR 향상 (dB)', '.4f')
    _stat('episode/final_psnr', '최종 PSNR (dB)', '.4f')
    _stat('episode/success_ratio', '성공률', '.4f')
    _stat('episode/flip_count', '플립 수', '.0f')
    _stat('episode/length', '에피소드 길이', '.0f')

    if 'timesteps/total' in data:
        print(f"  총 타임스텝: {data['timesteps/total']['values'][-1]:,.0f}")
    if 'loss/total_loss' in data:
        v = data['loss/total_loss']['values']
        print(f"  최종 Total Loss: {v[-1]:.4f} (최저: {min(v):.4f})")
    print("=" * 60)

print_summary(data)

## 3. Loss 그래프

In [None]:
loss_keys = ['loss/policy_loss', 'loss/value_loss', 'loss/entropy_loss', 'loss/total_loss']
loss_colors = ['#e74c3c', '#3498db', '#2ecc71', '#9b59b6']
loss_labels = ['Policy Loss', 'Value Loss', 'Entropy Loss', 'Total Loss']

fig, axes = plt.subplots(1, 4, figsize=(20, 4))
for ax, key, color, label in zip(axes, loss_keys, loss_colors, loss_labels):
    if key in data:
        v = data[key]
        ax.plot(v['steps'], v['values'], alpha=0.3, color=color, linewidth=0.8)
        if len(v['values']) > 3:
            ax.plot(v['steps'], smooth(v['values']), color=color, linewidth=2)
    ax.set_title(label, fontweight='bold')
    ax.set_xlabel('Global Step')
    ax.grid(True, alpha=0.3)
    ax.axhline(y=0, color='gray', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()

# 종합 오버레이
fig, ax = plt.subplots(figsize=(12, 4))
for key, color, label in zip(loss_keys, loss_colors, loss_labels):
    if key in data:
        v = data[key]
        if len(v['values']) > 3:
            ax.plot(v['steps'], smooth(v['values'], 0.8), color=color, linewidth=1.8, label=label)
        else:
            ax.plot(v['steps'], v['values'], color=color, linewidth=1.8, label=label)
ax.set_title('All Losses (smoothed)', fontweight='bold')
ax.set_xlabel('Global Step')
ax.legend(loc='best')
ax.grid(True, alpha=0.3)
ax.axhline(y=0, color='gray', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()

## 4. Episode 핵심 메트릭 (Reward, PSNR)

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Reward
ax = axes[0]
if 'episode/reward' in data:
    v = data['episode/reward']
    ax.plot(v['steps'], v['values'], alpha=0.3, color='#e67e22', linewidth=0.8)
    if len(v['values']) > 3:
        ax.plot(v['steps'], smooth(v['values']), color='#e67e22', linewidth=2)
ax.set_title('Episode Reward', fontweight='bold')
ax.set_xlabel('Episode')
ax.grid(True, alpha=0.3)
ax.axhline(y=0, color='gray', linestyle='--', alpha=0.5)

# PSNR Diff
ax = axes[1]
if 'episode/psnr_diff' in data:
    v = data['episode/psnr_diff']
    ax.plot(v['steps'], v['values'], 'o-', markersize=3, color='#27ae60', linewidth=1.5)
ax.set_title('PSNR Improvement (dB)', fontweight='bold')
ax.set_xlabel('Episode')
ax.grid(True, alpha=0.3)

# Initial vs Final PSNR
ax = axes[2]
if 'episode/initial_psnr' in data and 'episode/final_psnr' in data:
    ini = data['episode/initial_psnr']
    fin = data['episode/final_psnr']
    ax.plot(ini['steps'], ini['values'], 's-', markersize=3, color='#95a5a6', linewidth=1.2, label='Initial')
    ax.plot(fin['steps'], fin['values'], 'o-', markersize=3, color='#2980b9', linewidth=1.5, label='Final')
    ax.fill_between(fin['steps'], ini['values'], fin['values'], alpha=0.15, color='#2980b9')
    ax.legend(loc='best')
ax.set_title('PSNR (Initial → Final)', fontweight='bold')
ax.set_xlabel('Episode')
ax.set_ylabel('PSNR (dB)')
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 5. Episode 보조 메트릭 (Length, Flip, Success Ratio)

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Episode Length
ax = axes[0]
if 'episode/length' in data:
    v = data['episode/length']
    ax.bar(v['steps'], v['values'], color='#1abc9c', alpha=0.7, width=0.8)
ax.set_title('Episode Length (steps)', fontweight='bold')
ax.set_xlabel('Episode')
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{x:,.0f}'))
ax.grid(True, alpha=0.3, axis='y')

# Flip Count
ax = axes[1]
if 'episode/flip_count' in data:
    v = data['episode/flip_count']
    ax.bar(v['steps'], v['values'], color='#f39c12', alpha=0.7, width=0.8)
ax.set_title('Flip Count (successful)', fontweight='bold')
ax.set_xlabel('Episode')
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{x:,.0f}'))
ax.grid(True, alpha=0.3, axis='y')

# Success Ratio
ax = axes[2]
if 'episode/success_ratio' in data:
    v = data['episode/success_ratio']
    ax.plot(v['steps'], v['values'], 'o-', markersize=3, color='#8e44ad', linewidth=1.5)
    ax.set_ylim(0, max(max(v['values']) * 1.1, 0.5) if v['values'] else 1.0)
ax.set_title('Flip Success Ratio', fontweight='bold')
ax.set_xlabel('Episode')
ax.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1.0))
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 6. System (GPU Memory, Timesteps)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 4))

# GPU Memory
ax = axes[0]
if 'system/gpu_memory_gb' in data:
    v = data['system/gpu_memory_gb']
    ax.plot(v['steps'], v['values'], 's-', markersize=4, color='#c0392b', linewidth=1.5)
    ax.set_ylim(0, max(v['values']) * 1.3 if v['values'] else 24)
ax.set_title('GPU Memory (GB)', fontweight='bold')
ax.set_xlabel('Episode')
ax.grid(True, alpha=0.3)

# Total Timesteps
ax = axes[1]
if 'timesteps/total' in data:
    v = data['timesteps/total']
    ax.plot(v['steps'], v['values'], 'o-', markersize=3, color='#2c3e50', linewidth=1.5)
    ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{x:,.0f}'))
ax.set_title('Cumulative Timesteps', fontweight='bold')
ax.set_xlabel('Episode')
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 7. 여러 Run 비교 (선택)
아래 셀을 실행하면 `ppo_tensorboard/` 내 모든 run을 한 그래프에서 비교합니다.

In [None]:
# 모든 run 로딩
all_data = {}
for d in subdirs:
    rd = load_scalars(d)
    if rd:
        all_data[os.path.basename(d)] = rd

if len(all_data) <= 1:
    print("비교할 run이 1개 이하입니다. 이 셀은 건너뛰어도 됩니다.")
else:
    compare_tags = [
        ('episode/reward', 'Episode Reward'),
        ('episode/psnr_diff', 'PSNR Improvement'),
        ('episode/final_psnr', 'Final PSNR'),
        ('episode/success_ratio', 'Success Ratio'),
        ('loss/total_loss', 'Total Loss'),
        ('loss/policy_loss', 'Policy Loss'),
    ]
    colors = plt.cm.Set1.colors

    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
    fig.suptitle('Run Comparison', fontsize=16, fontweight='bold')

    for idx, (tag, title) in enumerate(compare_tags):
        ax = axes[idx // 3][idx % 3]
        for ri, (name, rd) in enumerate(all_data.items()):
            if tag in rd:
                v = rd[tag]
                c = colors[ri % len(colors)]
                ax.plot(v['steps'], v['values'], alpha=0.25, color=c, linewidth=0.5)
                if len(v['values']) > 3:
                    ax.plot(v['steps'], smooth(v['values']), color=c, linewidth=2, label=name)
                else:
                    ax.plot(v['steps'], v['values'], color=c, linewidth=2, label=name)
        ax.set_title(title, fontweight='bold')
        ax.legend(loc='best', fontsize=8)
        ax.grid(True, alpha=0.3)

    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.show()