# Анализ эксперимента с train_from_csv

In [None]:
import matplotlib.pyplot as plt

from pathlib import Path
from nn_laser_stabilizer.config.config import load_config

EXPERIMENT_DATE_TIME = "YYYY-MM-DD_HH-MM-SS"
EXPERIMENT_NAME = "train_from_csv"

EXPERIMENT_DIR_PATH = Path(f"../experiments/{EXPERIMENT_NAME}/{EXPERIMENT_DATE_TIME}")

CONFIG_PATH = EXPERIMENT_DIR_PATH / "config.yaml"
config = load_config(CONFIG_PATH)

base_config_path = Path(f"../configs/{config.base_config}.yaml")
base_config = load_config(base_config_path)

print(f"Эксперимент: {config.experiment_name}")

## Анализ процесса обучения

In [None]:
import re
import numpy as np
import pandas as pd


def parse_train_logs(file_path):
    step_pattern = re.compile(
        r"\[TRAIN\]\s+"
        r"step:\s+"
        r"(actor_loss=(?P<actor_loss>-?\d+\.\d+)\s+)?"
        r"buffer_size=(?P<buffer_size>\d+)\s+"
        r"loss_q1=(?P<loss_q1>-?\d+\.\d+)\s+"
        r"loss_q2=(?P<loss_q2>-?\d+\.\d+)\s+"
        r"step=(?P<step>\d+)\s+"
        r"time=(?P<time>-?\d+\.\d+)"
    )
    
    rows = []
    with open(file_path, 'r') as f:
        for line in f:
            line = line.strip()
            match = step_pattern.match(line)
            if match:
                actor_loss = match.group('actor_loss')
                rows.append({
                    'step': int(match.group('step')),
                    'loss_q1': float(match.group('loss_q1')),
                    'loss_q2': float(match.group('loss_q2')),
                    'actor_loss': float(actor_loss) if actor_loss else np.nan,
                    'buffer_size': int(match.group('buffer_size'))
                })
    
    return pd.DataFrame(rows)

In [None]:
TRAIN_LOG_PATH = EXPERIMENT_DIR_PATH / "train_logs" / "train.log"
loss_df = parse_train_logs(TRAIN_LOG_PATH)
print(f"Загружено {len(loss_df)} записей из логов обучения")
print(f"Диапазон шагов обучения: {loss_df['step'].min()} - {loss_df['step'].max()}")

In [None]:
fig, axes = plt.subplots(3, 1, figsize=(12, 10), sharex=True)

axes[0].plot(loss_df['step'], loss_df['loss_q1'], 'b-', alpha=0.7, label='Q1 Loss')
axes[0].set_title('Q1 Loss')
axes[0].set_ylabel('Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

axes[1].plot(loss_df['step'], loss_df['loss_q2'], 'g-', alpha=0.7, label='Q2 Loss')
axes[1].set_title('Q2 Loss')
axes[1].set_ylabel('Loss')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

axes[2].plot(loss_df['step'], loss_df['loss_q1'] + loss_df['loss_q2'], 'r--', alpha=0.7, label='Sum (Q1 + Q2)')
axes[2].set_title('Sum (Q1 + Q2)')
axes[2].set_xlabel('Step')
axes[2].set_ylabel('Loss')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
actor_loss_df = loss_df[loss_df['actor_loss'].notna()]

plt.figure(figsize=(12, 5))

plt.plot(actor_loss_df['step'], actor_loss_df['actor_loss'], 'r-', alpha=0.7)
plt.title('Actor Loss')

plt.xlabel('Step')
plt.ylabel('Loss')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

plt.figure(figsize=(12, 5))
plt.plot(loss_df['step'], loss_df['buffer_size'], 'm-', alpha=0.7)
plt.title('Buffer Size')
plt.xlabel('Step')
plt.ylabel('Size')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## Анализ работы обученной модели

In [None]:
import torch

from nn_laser_stabilizer.data.replay_buffer import ReplayBuffer
from nn_laser_stabilizer.model.actor import load_actor_from_path
from nn_laser_stabilizer.config.types import NetworkType

ACTOR_PATH = EXPERIMENT_DIR_PATH / "models" / "actor.pth"  
BUFFER_PATH = EXPERIMENT_DIR_PATH / "data" / "replay_buffer.pth"

buffer = ReplayBuffer.load(BUFFER_PATH)
print(f"ReplayBuffer loaded. Size: {len(buffer)} / capacity={buffer.capacity}")

actor_path = ACTOR_PATH.resolve()
if not actor_path.exists():
    raise FileNotFoundError(f"Actor model not found: {actor_path}")

print(f"\nLoading actor from: {actor_path}")

network_type_str = base_config.network.type
network_type = NetworkType(network_type_str)

actor = load_actor_from_path(actor_path, network_type)
actor.eval()

print(f"Actor loaded successfully (type: {network_type_str})")

buffer_size = len(buffer)
observations = buffer.observations[:buffer_size]
true_actions = buffer.actions[:buffer_size]
rewards = buffer.rewards[:buffer_size]

In [None]:
print(f"Analyzing {buffer_size} transitions...")

predicted_actions = []

with torch.no_grad():
    if network_type == NetworkType.LSTM:
        hidden_state = None
        for i in range(buffer_size):
            obs = observations[i]
            action, options = actor.act(obs, {'hidden_state': hidden_state})
            hidden_state = options.get('hidden_state')
            predicted_actions.append(action)
        
        predicted_actions = torch.stack(predicted_actions, dim=0)
    else:
        batch_size = 1024
        for i in range(0, buffer_size, batch_size):
            end_idx = min(i + batch_size, buffer_size)
            batch_obs = observations[i:end_idx]
            batch_actions, _ = actor.act(batch_obs)
            predicted_actions.append(batch_actions)
        
        predicted_actions = torch.cat(predicted_actions, dim=0)

print(f"Predictions completed. Shape: {predicted_actions.shape}")

In [None]:
mse = torch.mean((predicted_actions - true_actions) ** 2).item()
mae = torch.mean(torch.abs(predicted_actions - true_actions)).item()

action_dim = true_actions.shape[1]
mse_per_dim = []
mae_per_dim = []
for dim in range(action_dim):
    mse_dim = torch.mean((predicted_actions[:, dim] - true_actions[:, dim]) ** 2).item()
    mae_dim = torch.mean(torch.abs(predicted_actions[:, dim] - true_actions[:, dim])).item()
    mse_per_dim.append(mse_dim)
    mae_per_dim.append(mae_dim)

mean_reward = torch.mean(rewards).item()

print("=" * 60)
print("Analysis Results:")
print("=" * 60)
print(f"Total transitions analyzed: {buffer_size}")
print(f"Mean reward: {mean_reward:.6f}")
print("")
print("Action prediction metrics:")
print(f"  MSE (Mean Squared Error): {mse:.6f}")
print(f"  MAE (Mean Absolute Error): {mae:.6f}")

if action_dim > 1:
    print("  Per-dimension metrics:")
    for dim in range(action_dim):
        print(f"    Dim {dim}: MSE={mse_per_dim[dim]:.6f}, MAE={mae_per_dim[dim]:.6f}")

print("=" * 60)

obs_error = observations[:, 0].numpy()
obs_control_norm = observations[:, 1].numpy()
true_actions_np = true_actions[:, 0].numpy()
predicted_actions_np = predicted_actions[:, 0].numpy()
rewards_np = rewards[:, 0].numpy()
action_errors = predicted_actions_np - true_actions_np

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

axes[0, 0].scatter(true_actions_np, predicted_actions_np, alpha=0.5, s=1)
axes[0, 0].plot([true_actions_np.min(), true_actions_np.max()], 
                [true_actions_np.min(), true_actions_np.max()], 
                'r--', lw=2, label='Perfect prediction')
axes[0, 0].set_xlabel('True Action')
axes[0, 0].set_ylabel('Predicted Action')
axes[0, 0].set_title('True vs Predicted Actions')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

axes[0, 1].plot(action_errors, alpha=0.7, linewidth=0.5)
axes[0, 1].axhline(y=0, color='r', linestyle='--', linewidth=1)
axes[0, 1].set_xlabel('Transition Index')
axes[0, 1].set_ylabel('Action Error (Predicted - True)')
axes[0, 1].set_title('Action Prediction Error Over Time')
axes[0, 1].grid(True, alpha=0.3)

axes[1, 0].hist(action_errors, bins=50, alpha=0.7, edgecolor='black')
axes[1, 0].axvline(x=0, color='r', linestyle='--', linewidth=1)
axes[1, 0].set_xlabel('Action Error')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].set_title('Distribution of Action Errors')
axes[1, 0].grid(True, alpha=0.3)

axes[1, 1].plot(rewards_np, alpha=0.7, linewidth=0.5)
axes[1, 1].axhline(y=mean_reward, color='r', linestyle='--', linewidth=2, 
                   label=f'Mean reward: {mean_reward:.4f}')
axes[1, 1].set_xlabel('Transition Index')
axes[1, 1].set_ylabel('Reward')
axes[1, 1].set_title('Reward Over Time')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()

fig, axes = plt.subplots(1, 2, figsize=(15, 5))

axes[0].scatter(obs_error, action_errors, alpha=0.5, s=1)
axes[0].axhline(y=0, color='r', linestyle='--', linewidth=1)
axes[0].set_xlabel('Observation Error (setpoint - process_variable)')
axes[0].set_ylabel('Action Prediction Error')
axes[0].set_title('Action Error vs Observation Error')
axes[0].grid(True, alpha=0.3)

axes[1].scatter(obs_control_norm, action_errors, alpha=0.5, s=1)
axes[1].axhline(y=0, color='r', linestyle='--', linewidth=1)
axes[1].set_xlabel('Observation Control Output (normalized)')
axes[1].set_ylabel('Action Prediction Error')
axes[1].set_title('Action Error vs Control Output in Observation')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

results_df = pd.DataFrame({
    'observation_error': obs_error,
    'observation_control_output_norm': obs_control_norm,
    'true_action': true_actions_np,
    'predicted_action': predicted_actions_np,
    'action_error': action_errors,
    'reward': rewards_np,
})

output_path = Path("analysis_results.csv")
results_df.to_csv(output_path, index=False)
print(f"\nDetailed results saved to: {output_path}")

print("\nStatistics:")
print(results_df.describe())

In [None]:
fig, ax = plt.subplots(figsize=(15, 6))

sample_size = len(true_actions_np) 
indices = np.arange(sample_size)

ax.plot(indices, true_actions_np[:sample_size], 
        label='True Action', 
        alpha=0.7, 
        linewidth=0.8,
        color='blue')

ax.plot(indices, predicted_actions_np[:sample_size], 
        label='Predicted Action (Model)', 
        alpha=0.7, 
        linewidth=0.8,
        color='red')

ax.set_xlabel('Transition Index (Time)', fontsize=12)
ax.set_ylabel('Action Value', fontsize=12)
ax.set_title('True Action vs Predicted Action Over Time', fontsize=14)
ax.legend(fontsize=11)
ax.grid(True, alpha=0.3)

fig.tight_layout()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

observations = buffer.observations[:buffer_size]   # shape: [N, obs_dim]
actions      = buffer.actions[:buffer_size]        # shape: [N, act_dim]
rewards      = buffer.rewards[:buffer_size]        # shape: [N, 1]

# Предполагаем, что observation = [error, control_output_norm]
obs_error         = observations[:, 0].numpy()
obs_control_norm  = observations[:, 1].numpy()
actions_np        = actions[:, 0].numpy()
rewards_np        = rewards[:, 0].numpy()

df = pd.DataFrame({
    "error": obs_error,
    "control_norm": obs_control_norm,
    "action": actions_np,
    "reward": rewards_np,
})
print("\nDataFrame head:")
display(df.head())
print("\nDataFrame describe:")
display(df.describe())

fig, axes = plt.subplots(2, 2, figsize=(12, 8))

axes[0, 0].hist(df["error"], bins=100, alpha=0.7, edgecolor="black")
axes[0, 0].set_title("Error distribution")
axes[0, 0].set_xlabel("error")
axes[0, 0].set_ylabel("count")
axes[0, 0].grid(True, alpha=0.3)

axes[0, 1].hist(df["control_norm"], bins=100, alpha=0.7, edgecolor="black", color="orange")
axes[0, 1].set_title("Control (in observation) distribution")
axes[0, 1].set_xlabel("control_norm")
axes[0, 1].set_ylabel("count")
axes[0, 1].grid(True, alpha=0.3)

axes[1, 0].hist(df["action"], bins=100, alpha=0.7, edgecolor="black", color="green")
axes[1, 0].set_title("Action distribution")
axes[1, 0].set_xlabel("action")
axes[1, 0].set_ylabel("count")
axes[1, 0].grid(True, alpha=0.3)

axes[1, 1].hist(df["reward"], bins=100, alpha=0.7, edgecolor="black", color="purple")
axes[1, 1].set_title("Reward distribution")
axes[1, 1].set_xlabel("reward")
axes[1, 1].set_ylabel("count")
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

fig, axes = plt.subplots(1, 3, figsize=(18, 5))

axes[0].scatter(df["error"], df["action"], s=1, alpha=0.3)
axes[0].set_xlabel("error")
axes[0].set_ylabel("action")
axes[0].set_title("Action vs Error")
axes[0].grid(True, alpha=0.3)

axes[1].scatter(df["error"], df["reward"], s=1, alpha=0.3, color="red")
axes[1].set_xlabel("error")
axes[1].set_ylabel("reward")
axes[1].set_title("Reward vs Error")
axes[1].grid(True, alpha=0.3)

axes[2].scatter(df["action"], df["reward"], s=1, alpha=0.3, color="green")
axes[2].set_xlabel("action")
axes[2].set_ylabel("reward")
axes[2].set_title("Reward vs Action")
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

max_points = buffer_size
idx = np.arange(max_points)

fig, axes = plt.subplots(4, 1, figsize=(14, 10), sharex=True)

axes[0].plot(idx, df["error"].values[:max_points], linewidth=0.5)
axes[0].set_ylabel("error")
axes[0].set_title("Error over time")
axes[0].grid(True, alpha=0.3)

axes[1].plot(idx, df["control_norm"].values[:max_points], linewidth=0.5, color="orange")
axes[1].set_ylabel("control_norm")
axes[1].set_title("Control (in observation) over time")
axes[1].grid(True, alpha=0.3)

axes[2].plot(idx, df["action"].values[:max_points], linewidth=0.5, color="green")
axes[2].set_ylabel("action")
axes[2].set_title("Action over time")
axes[2].grid(True, alpha=0.3)

axes[3].plot(idx, df["reward"].values[:max_points], linewidth=0.5, color="purple")
axes[3].set_ylabel("reward")
axes[3].set_xlabel("transition index")
axes[3].set_title("Reward over time")
axes[3].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

plt.figure(figsize=(6, 5))
corr = df.corr(numeric_only=True)
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", vmin=-1, vmax=1)
plt.title("Correlation matrix (error, control_norm, action, reward)")
plt.tight_layout()
plt.show()