In [None]:
# 时间感知GRU-TD3深度强化学习训练
# Time-Aware GRU-TD3 Deep Reinforcement Learning Training
# 
# 本notebook实现基于时间感知GRU的TD3算法用于电磁式阻尼器控制
# This notebook implements time-aware GRU-based TD3 algorithm for electromagnetic damper control

import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import random
import os
import logging
from datetime import datetime

# 导入自定义模块
from env import ElectromagneticDamperEnv
from TD3 import GruTD3Agent
from train import train_gru_td3
from af import plot_training_results, save_checkpoint, load_checkpoint
from fx import reward_function_combined

print("="*80)
print("时间感知GRU-TD3电磁式阻尼器控制训练系统")
print("Time-Aware GRU-TD3 Electromagnetic Damper Control Training System")
print("="*80)
print("PyTorch版本:", torch.__version__)
print("CUDA可用:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA设备:", torch.cuda.get_device_name())
print("="*80)

In [None]:
# 系统参数配置
print("配置训练参数...")

# 设置随机种子
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)

# 训练参数
TOTAL_EPISODES = 2000
MAX_STEPS = 1000
BATCH_SIZE = 128
LEARNING_RATE_ACTOR = 1e-4
LEARNING_RATE_CRITIC = 1e-3
GAMMA = 0.99
TAU = 0.001
NOISE_STD = 0.2
NOISE_CLIP = 0.5
POLICY_DELAY = 2
MEMORY_SIZE = 100000

# GRU特定参数
SEQUENCE_LENGTH = 50
HIDDEN_SIZE = 128
NUM_LAYERS = 2
USE_TIME_INPUT = True  # 关键：启用时间感知

# 环境参数
INITIAL_DISPLACEMENT = 0.001  # 初始位移
TIME_NOISE_STD = 0.1  # 时间噪声标准差
VARIABLE_TIMESTEP = True  # 启用可变时间步长

# 日志配置
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
log_dir = "gru_savedata"
checkpoint_dir = f"{log_dir}/gru_checkpoints"
plot_dir = f"{log_dir}/plots"

# 创建保存目录
os.makedirs(log_dir, exist_ok=True)
os.makedirs(checkpoint_dir, exist_ok=True)
os.makedirs(plot_dir, exist_ok=True)

# 配置日志
log_file = f"{log_dir}/training_log_{timestamp}.log"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file),
        logging.StreamHandler()
    ]
)

print(f"训练参数配置完成:")
print(f"  总训练轮次: {TOTAL_EPISODES}")
print(f"  序列长度: {SEQUENCE_LENGTH}")
print(f"  隐藏单元数: {HIDDEN_SIZE}")
print(f"  时间感知模式: {USE_TIME_INPUT}")
print(f"  时间噪声标准差: {TIME_NOISE_STD}")
print(f"  可变时间步长: {VARIABLE_TIMESTEP}")
print(f"  检查点保存路径: {checkpoint_dir}")
print(f"  训练日志: {log_file}")

logging.info(f"开始时间感知GRU-TD3训练 - 种子: {SEED}, 轮次: {TOTAL_EPISODES}")

In [None]:
# 扰动和奖励函数定义
print("定义扰动和奖励函数...")

def time_aware_disturbance_function(t, amplitude_scale=1.0, time_noise_std=0.1):
    """
    时间感知的多频率扰动函数
    
    Args:
        t: 时间
        amplitude_scale: 幅值缩放因子
        time_noise_std: 时间噪声标准差
    """
    # 添加时间噪声来模拟实际测量中的时间不确定性
    noisy_t = t + np.random.normal(0, time_noise_std)
    
    # 多频率组合扰动（考虑时间噪声）
    f1, f2, f3 = 1.0, 2.5, 4.0  # Hz
    disturbance = amplitude_scale * (
        0.5 * np.sin(2 * np.pi * f1 * noisy_t) +
        0.3 * np.sin(2 * np.pi * f2 * noisy_t) +
        0.2 * np.sin(2 * np.pi * f3 * noisy_t) +
        0.1 * np.random.normal()  # 随机噪声
    )
    
    return disturbance

def time_aware_reward_function(state, action, next_state, time_info=None):
    """
    时间感知的奖励函数
    
    Args:
        state: 当前状态
        action: 执行的动作
        next_state: 下一状态
        time_info: 时间信息字典，包含当前时间、时间步长等
    """
    # 基础奖励（基于位移和速度）
    x2 = next_state[3]  # 主结构位移
    v2 = next_state[4]  # 主结构速度
    
    # 位移惩罚（非线性）
    displacement_penalty = -(x2**2) * 1000
    
    # 速度惩罚
    velocity_penalty = -(v2**2) * 100
    
    # 动作惩罚（避免过大控制力）
    action_penalty = -(action**2) * 0.1
    
    # 时间感知的调节因子
    time_factor = 1.0
    if time_info is not None and 'current_time' in time_info:
        current_time = time_info['current_time']
        # 在特定时间段增强奖励权重（例如在共振频率附近）
        if 0.8 <= (current_time % 1.0) <= 1.0:  # 每秒的最后0.2秒
            time_factor = 1.2
        
        # 根据时间步长调整奖励
        if 'timestep' in time_info:
            dt = time_info['timestep']
            # 较小的时间步长给予奖励加成（更精确的控制）
            if dt < 0.01:
                time_factor *= 1.1
    
    # 综合奖励
    reward = time_factor * (displacement_penalty + velocity_penalty + action_penalty)
    
    return reward

print("扰动函数: 时间感知多频率组合（时间噪声标准差: {:.2f}）".format(TIME_NOISE_STD))
print("奖励函数: 时间感知综合奖励（位移、速度、动作、时间因子）")
print("时间感知特性:")
print("  - 时间噪声模拟测量不确定性")
print("  - 时间相关的奖励权重调节")
print("  - 可变时间步长适应性")

logging.info("时间感知扰动和奖励函数配置完成")

In [None]:
# 环境初始化
print("="*60)
print("初始化时间感知训练环境")
print("="*60)

# 创建环境
env = ElectromagneticDamperEnv(
    disturbance_func=time_aware_disturbance_function,
    reward_func=time_aware_reward_function,
    initial_displacement=INITIAL_DISPLACEMENT,
    time_noise_std=TIME_NOISE_STD,
    variable_timestep=VARIABLE_TIMESTEP
)

print("环境配置:")
print(f"  状态空间维度: {env.state_dim}")
print(f"  动作空间维度: {env.action_dim}")
print(f"  最大动作幅值: {env.max_action}")
print(f"  时间步长: {env.Ts} s")
print(f"  初始位移: {INITIAL_DISPLACEMENT} m")
print(f"  时间噪声标准差: {TIME_NOISE_STD}")
print(f"  可变时间步长: {VARIABLE_TIMESTEP}")

# 系统物理参数显示
print(f"\n系统物理参数:")
print(f"  主结构质量 m2: {env.m2} kg")
print(f"  TMD质量 m1: {env.m1} kg")
print(f"  主结构刚度 k2: {env.k2} N/m")
print(f"  TMD刚度 k1: {env.k1} N/m")
print(f"  主结构阻尼 c2: {env.c2} Ns/m")
print(f"  TMD基础阻尼 c1: {env.c1} Ns/m")
print(f"  电磁阻尼系数 μ: {env.mu}")

# 测试环境运行
print(f"\n测试环境初始化...")
state = env.reset()
print(f"初始状态维度: {state.shape}")
print(f"初始状态: {state}")

# 测试一步动作
test_action = np.array([0.1])
next_state, reward, done, info = env.step(test_action)
print(f"测试动作: {test_action}")
print(f"奖励: {reward:.4f}")
print(f"信息: {info}")

print("环境初始化完成！")
logging.info(f"时间感知环境初始化完成 - 状态维度: {env.state_dim}, 动作维度: {env.action_dim}")

In [None]:
# 时间感知GRU-TD3智能体初始化
print("="*60)
print("初始化时间感知GRU-TD3智能体")
print("="*60)

# 确定设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")

# 初始化时间感知GRU-TD3智能体
agent = GruTD3Agent(
    state_dim=env.state_dim,
    action_dim=env.action_dim,
    max_action=env.max_action,
    hidden_size=HIDDEN_SIZE,
    num_layers=NUM_LAYERS,
    sequence_length=SEQUENCE_LENGTH,
    use_time_input=USE_TIME_INPUT,  # 关键：启用时间输入
    lr_actor=LEARNING_RATE_ACTOR,
    lr_critic=LEARNING_RATE_CRITIC,
    gamma=GAMMA,
    tau=TAU,
    noise_std=NOISE_STD,
    noise_clip=NOISE_CLIP,
    policy_delay=POLICY_DELAY,
    device=device
)

print("时间感知GRU-TD3智能体配置:")
print(f"  状态维度: {env.state_dim}")
print(f"  动作维度: {env.action_dim}")
print(f"  隐藏单元数: {HIDDEN_SIZE}")
print(f"  GRU层数: {NUM_LAYERS}")
print(f"  序列长度: {SEQUENCE_LENGTH}")
print(f"  时间输入: {USE_TIME_INPUT}")
print(f"  Actor学习率: {LEARNING_RATE_ACTOR}")
print(f"  Critic学习率: {LEARNING_RATE_CRITIC}")
print(f"  折扣因子: {GAMMA}")
print(f"  软更新率: {TAU}")
print(f"  探索噪声标准差: {NOISE_STD}")
print(f"  策略延迟: {POLICY_DELAY}")

# 网络结构信息
print(f"\n网络结构:")
print(f"  Actor网络参数数量: {sum(p.numel() for p in agent.actor.parameters())}")
print(f"  Critic1网络参数数量: {sum(p.numel() for p in agent.critic1.parameters())}")
print(f"  Critic2网络参数数量: {sum(p.numel() for p in agent.critic2.parameters())}")

# 测试智能体
print(f"\n测试智能体运行...")
test_state_sequence = torch.FloatTensor([state] * SEQUENCE_LENGTH).unsqueeze(0).to(device)
if USE_TIME_INPUT:
    test_time_sequence = torch.FloatTensor([[i * env.Ts for i in range(SEQUENCE_LENGTH)]]).to(device)
    test_action = agent.select_action(test_state_sequence, test_time_sequence, add_noise=False)
else:
    test_action = agent.select_action(test_state_sequence, add_noise=False)

print(f"测试动作输出: {test_action}")
print(f"动作形状: {test_action.shape}")

print("时间感知GRU-TD3智能体初始化完成！")
logging.info(f"时间感知GRU-TD3智能体初始化完成 - 网络参数: "
            f"Actor {sum(p.numel() for p in agent.actor.parameters())}, "
            f"Critic1 {sum(p.numel() for p in agent.critic1.parameters())}, "
            f"Critic2 {sum(p.numel() for p in agent.critic2.parameters())}")

In [None]:
# 训练配置和检查点管理
print("="*60)
print("配置训练流程和检查点管理")
print("="*60)

# 训练配置
training_config = {
    'episodes': TOTAL_EPISODES,
    'max_steps': MAX_STEPS,
    'batch_size': BATCH_SIZE,
    'memory_size': MEMORY_SIZE,
    'sequence_length': SEQUENCE_LENGTH,
    'use_time_input': USE_TIME_INPUT,
    'checkpoint_dir': checkpoint_dir,
    'plot_dir': plot_dir,
    'save_interval': 100,  # 每100轮保存一次
    'eval_interval': 50,   # 每50轮评估一次
    'print_interval': 10   # 每10轮打印一次
}

print("训练配置:")
for key, value in training_config.items():
    print(f"  {key}: {value}")

# 检查是否存在之前的检查点
checkpoint_path = f"{checkpoint_dir}/time_aware_gru_td3_latest.pth"
start_episode = 0

if os.path.exists(checkpoint_path):
    print(f"\n发现检查点文件: {checkpoint_path}")
    choice = input("是否从检查点继续训练？(y/n): ").lower().strip()
    
    if choice == 'y':
        try:
            checkpoint = load_checkpoint(checkpoint_path)
            agent.actor.load_state_dict(checkpoint['actor_state_dict'])
            agent.critic1.load_state_dict(checkpoint['critic1_state_dict'])
            agent.critic2.load_state_dict(checkpoint['critic2_state_dict'])
            agent.actor_target.load_state_dict(checkpoint['actor_target_state_dict'])
            agent.critic1_target.load_state_dict(checkpoint['critic1_target_state_dict'])
            agent.critic2_target.load_state_dict(checkpoint['critic2_target_state_dict'])
            agent.actor_optimizer.load_state_dict(checkpoint['actor_optimizer'])
            agent.critic1_optimizer.load_state_dict(checkpoint['critic1_optimizer'])
            agent.critic2_optimizer.load_state_dict(checkpoint['critic2_optimizer'])
            start_episode = checkpoint['episode'] + 1
            
            print(f"成功加载检查点，从第 {start_episode} 轮开始继续训练")
            logging.info(f"从检查点恢复训练 - 起始轮次: {start_episode}")
        except Exception as e:
            print(f"加载检查点失败: {e}")
            print("将从头开始训练")
            logging.warning(f"检查点加载失败: {e}")
    else:
        print("选择从头开始训练")
else:
    print("未发现检查点文件，从头开始训练")

# 训练结果记录
rewards_log = []
losses_log = {'actor': [], 'critic1': [], 'critic2': []}
eval_rewards = []

print(f"\n准备开始训练:")
print(f"  起始轮次: {start_episode}")
print(f"  目标轮次: {TOTAL_EPISODES}")
print(f"  剩余轮次: {TOTAL_EPISODES - start_episode}")
print(f"  检查点保存路径: {checkpoint_path}")

logging.info(f"训练配置完成 - 起始轮次: {start_episode}, 目标轮次: {TOTAL_EPISODES}")

In [None]:
# 开始时间感知GRU-TD3训练
print("="*80)
print("开始时间感知GRU-TD3训练")
print("="*80)

try:
    # 执行训练
    training_results = train_gru_td3(
        agent=agent,
        env=env,
        episodes=TOTAL_EPISODES,
        max_steps=MAX_STEPS,
        batch_size=BATCH_SIZE,
        memory_size=MEMORY_SIZE,
        sequence_length=SEQUENCE_LENGTH,
        use_time_input=USE_TIME_INPUT,  # 关键：启用时间输入
        start_episode=start_episode,
        checkpoint_dir=checkpoint_dir,
        save_interval=training_config['save_interval'],
        eval_interval=training_config['eval_interval'],
        print_interval=training_config['print_interval'],
        device=device
    )
    
    print("="*80)
    print("训练完成！")
    print("="*80)
    
    # 解包训练结果
    rewards_log = training_results['rewards']
    losses_log = training_results['losses']
    eval_rewards = training_results['eval_rewards']
    
    # 保存最终模型
    final_checkpoint_path = f"{checkpoint_dir}/time_aware_gru_td3_final.pth"
    save_checkpoint(agent, TOTAL_EPISODES-1, final_checkpoint_path)
    
    # 保存训练日志到CSV
    import pandas as pd
    
    # 奖励日志
    rewards_df = pd.DataFrame({
        'episode': range(len(rewards_log)),
        'reward': rewards_log
    })
    rewards_csv_path = f"{log_dir}/time_aware_gru_rewards_log{timestamp[4:]}.csv"
    rewards_df.to_csv(rewards_csv_path, index=False)
    
    # 评估奖励日志
    if eval_rewards:
        eval_df = pd.DataFrame({
            'episode': [i * training_config['eval_interval'] for i in range(len(eval_rewards))],
            'eval_reward': eval_rewards
        })
        eval_csv_path = f"{log_dir}/time_aware_gru_eval_log{timestamp[4:]}.csv"
        eval_df.to_csv(eval_csv_path, index=False)
    
    print(f"训练结果保存:")
    print(f"  最终模型: {final_checkpoint_path}")
    print(f"  奖励日志: {rewards_csv_path}")
    if eval_rewards:
        print(f"  评估日志: {eval_csv_path}")
    
    # 训练统计
    print(f"\n训练统计:")
    print(f"  总训练轮次: {len(rewards_log)}")
    print(f"  平均奖励: {np.mean(rewards_log):.4f}")
    print(f"  最佳奖励: {np.max(rewards_log):.4f}")
    print(f"  最终奖励: {rewards_log[-1]:.4f}")
    if eval_rewards:
        print(f"  最佳评估奖励: {np.max(eval_rewards):.4f}")
    
    logging.info(f"时间感知GRU-TD3训练完成 - 总轮次: {len(rewards_log)}, "
                f"平均奖励: {np.mean(rewards_log):.4f}, "
                f"最佳奖励: {np.max(rewards_log):.4f}")

except KeyboardInterrupt:
    print("\n训练被用户中断")
    logging.info("训练被用户中断")
except Exception as e:
    print(f"\n训练过程中发生错误: {e}")
    logging.error(f"训练错误: {e}")
    import traceback
    traceback.print_exc()

In [None]:
# 训练结果分析和可视化
print("="*60)
print("分析和可视化训练结果")
print("="*60)

if 'rewards_log' in locals() and len(rewards_log) > 0:
    
    # 绘制训练奖励曲线
    plt.figure(figsize=(15, 10))
    
    # 奖励曲线
    plt.subplot(2, 2, 1)
    plt.plot(rewards_log, 'b-', alpha=0.6, linewidth=1)
    
    # 计算移动平均
    window_size = min(100, len(rewards_log) // 10)
    if window_size > 1:
        moving_avg = np.convolve(rewards_log, np.ones(window_size)/window_size, mode='valid')
        plt.plot(range(window_size-1, len(rewards_log)), moving_avg, 'r-', linewidth=2, 
                label=f'移动平均 (窗口={window_size})')
        plt.legend()
    
    plt.title('时间感知GRU-TD3训练奖励曲线')
    plt.xlabel('训练轮次')
    plt.ylabel('奖励')
    plt.grid(True, alpha=0.3)
    
    # 评估奖励曲线
    if eval_rewards:
        plt.subplot(2, 2, 2)
        eval_episodes = [i * training_config['eval_interval'] for i in range(len(eval_rewards))]
        plt.plot(eval_episodes, eval_rewards, 'g-o', linewidth=2, markersize=4)
        plt.title('评估奖励曲线')
        plt.xlabel('训练轮次')
        plt.ylabel('评估奖励')
        plt.grid(True, alpha=0.3)
    
    # 损失曲线
    if losses_log['actor']:
        plt.subplot(2, 2, 3)
        plt.plot(losses_log['actor'], 'r-', label='Actor损失', alpha=0.7)
        plt.title('Actor网络损失')
        plt.xlabel('更新步数')
        plt.ylabel('损失')
        plt.legend()
        plt.grid(True, alpha=0.3)
    
    if losses_log['critic1'] and losses_log['critic2']:
        plt.subplot(2, 2, 4)
        plt.plot(losses_log['critic1'], 'b-', label='Critic1损失', alpha=0.7)
        plt.plot(losses_log['critic2'], 'g-', label='Critic2损失', alpha=0.7)
        plt.title('Critic网络损失')
        plt.xlabel('更新步数')
        plt.ylabel('损失')
        plt.legend()
        plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(f'{plot_dir}/time_aware_gru_td3_training_curves.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    # 奖励统计分析
    print("训练奖励统计分析:")
    print(f"  总轮次: {len(rewards_log)}")
    print(f"  平均奖励: {np.mean(rewards_log):.4f}")
    print(f"  标准差: {np.std(rewards_log):.4f}")
    print(f"  最小奖励: {np.min(rewards_log):.4f}")
    print(f"  最大奖励: {np.max(rewards_log):.4f}")
    print(f"  最终奖励: {rewards_log[-1]:.4f}")
    
    # 计算收敛性指标
    last_100_rewards = rewards_log[-100:] if len(rewards_log) >= 100 else rewards_log
    print(f"  最后100轮平均奖励: {np.mean(last_100_rewards):.4f}")
    print(f"  最后100轮标准差: {np.std(last_100_rewards):.4f}")
    
    # 趋势分析
    if len(rewards_log) > 200:
        first_half = rewards_log[:len(rewards_log)//2]
        second_half = rewards_log[len(rewards_log)//2:]
        improvement = np.mean(second_half) - np.mean(first_half)
        print(f"  训练改进程度: {improvement:.4f}")
    
    print(f"\n训练曲线已保存到: {plot_dir}/time_aware_gru_td3_training_curves.png")
    
    logging.info(f"训练结果分析完成 - 平均奖励: {np.mean(rewards_log):.4f}, "
                f"最佳奖励: {np.max(rewards_log):.4f}, "
                f"最后100轮平均: {np.mean(last_100_rewards):.4f}")
    
else:
    print("警告: 没有找到训练结果数据")
    logging.warning("训练结果数据缺失")

In [None]:
# 时间感知模型测试和性能评估
print("="*60)
print("时间感知模型性能测试")
print("="*60)

# 测试训练好的时间感知模型
print("运行时间感知控制仿真测试...")
test_data_with_control = env.run_simulation(controller=agent)

print("运行无控制仿真对比...")
test_data_no_control = env.run_simulation(controller=None)

# 时间感知性能指标计算
def calculate_time_aware_performance_metrics(controlled_data, uncontrolled_data):
    """计算时间感知控制性能指标"""
    # 主结构位移（索引3）
    x2_controlled = controlled_data['all_states'][:, 3]
    x2_uncontrolled = uncontrolled_data['all_states'][:, 3]
    
    # 基础性能指标
    rms_controlled = np.sqrt(np.mean(x2_controlled**2))
    rms_uncontrolled = np.sqrt(np.mean(x2_uncontrolled**2))
    
    max_controlled = np.max(np.abs(x2_controlled))
    max_uncontrolled = np.max(np.abs(x2_uncontrolled))
    
    # 减振效果
    rms_reduction = (rms_uncontrolled - rms_controlled) / rms_uncontrolled * 100
    max_reduction = (max_uncontrolled - max_controlled) / max_uncontrolled * 100
    
    # 控制能耗
    control_energy = np.sum(np.array(controlled_data['actions'][1:])**2) * env.Ts
    
    # 时间感知特定指标
    # 控制力平滑度（相邻时刻控制力变化的标准差）
    control_actions = np.array(controlled_data['actions'][1:])
    control_smoothness = np.std(np.diff(control_actions)) if len(control_actions) > 1 else 0
    
    # 响应时间（达到稳态的时间）
    settling_time = 0
    if len(x2_controlled) > 100:
        final_value = np.mean(x2_controlled[-50:])  # 最后50个点的平均值作为稳态值
        tolerance = 0.02 * np.max(np.abs(x2_controlled))  # 2%容限
        for i in range(len(x2_controlled)-50, 0, -1):
            if np.abs(x2_controlled[i] - final_value) > tolerance:
                settling_time = i * env.Ts
                break
    
    # 频域性能（在主要频率成分上的抑制效果）
    from scipy import signal
    freq, psd_controlled = signal.welch(x2_controlled, fs=1/env.Ts, nperseg=min(1024, len(x2_controlled)//4))
    freq, psd_uncontrolled = signal.welch(x2_uncontrolled, fs=1/env.Ts, nperseg=min(1024, len(x2_uncontrolled)//4))
    
    # 在主要频率成分（1Hz, 2.5Hz, 4Hz）附近的抑制效果
    target_freqs = [1.0, 2.5, 4.0]
    freq_suppressions = []
    for target_freq in target_freqs:
        freq_idx = np.argmin(np.abs(freq - target_freq))
        if freq_idx < len(psd_controlled) and freq_idx < len(psd_uncontrolled):
            suppression = (psd_uncontrolled[freq_idx] - psd_controlled[freq_idx]) / psd_uncontrolled[freq_idx] * 100
            freq_suppressions.append(suppression)
    
    avg_freq_suppression = np.mean(freq_suppressions) if freq_suppressions else 0
    
    return {
        'rms_controlled': rms_controlled,
        'rms_uncontrolled': rms_uncontrolled,
        'rms_reduction': rms_reduction,
        'max_controlled': max_controlled,
        'max_uncontrolled': max_uncontrolled,
        'max_reduction': max_reduction,
        'control_energy': control_energy,
        'control_smoothness': control_smoothness,
        'settling_time': settling_time,
        'avg_freq_suppression': avg_freq_suppression
    }

metrics = calculate_time_aware_performance_metrics(test_data_with_control, test_data_no_control)

print(f"性能指标（时间感知GRU-TD3）:")
print(f"  RMS位移减少: {metrics['rms_reduction']:.1f}%")
print(f"  最大位移减少: {metrics['max_reduction']:.1f}%")
print(f"  平均频域抑制: {metrics['avg_freq_suppression']:.1f}%")
print(f"  有控制RMS: {metrics['rms_controlled']:.4f} m")
print(f"  无控制RMS: {metrics['rms_uncontrolled']:.4f} m")
print(f"  有控制最大位移: {metrics['max_controlled']:.4f} m")
print(f"  无控制最大位移: {metrics['max_uncontrolled']:.4f} m")
print(f"  控制能耗: {metrics['control_energy']:.4f}")
print(f"  控制平滑度: {metrics['control_smoothness']:.4f}")
print(f"  调节时间: {metrics['settling_time']:.2f} s")

logging.info(f"时间感知性能测试结果 - RMS减少: {metrics['rms_reduction']:.1f}%, "
            f"最大位移减少: {metrics['max_reduction']:.1f}%, "
            f"频域抑制: {metrics['avg_freq_suppression']:.1f}%, "
            f"控制能耗: {metrics['control_energy']:.4f}")

In [None]:
# 时间感知对比可视化分析
print("="*60)
print("生成时间感知对比图表")
print("="*60)

# 设置时间轴
time_test = np.arange(len(test_data_with_control['all_states'])) * env.Ts

# 创建详细对比图
fig, axes = plt.subplots(4, 1, figsize=(14, 12))

# 位移对比
axes[0].plot(time_test, test_data_with_control['all_states'][:, 3], 'b-', 
            label='时间感知GRU-TD3控制', linewidth=1.5)
axes[0].plot(time_test, test_data_no_control['all_states'][:, 3], 'r--', 
            label='无控制', linewidth=1.5, alpha=0.7)
axes[0].set_ylabel('位移 (m)')
axes[0].set_title('主结构位移响应对比（时间感知）')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# 速度对比
axes[1].plot(time_test, test_data_with_control['all_states'][:, 4], 'b-', 
            label='时间感知GRU-TD3控制', linewidth=1.5)
axes[1].plot(time_test, test_data_no_control['all_states'][:, 4], 'r--', 
            label='无控制', linewidth=1.5, alpha=0.7)
axes[1].set_ylabel('速度 (m/s)')
axes[1].set_title('主结构速度响应对比（时间感知）')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# 控制力时程
control_actions = test_data_with_control['actions'][1:]
axes[2].plot(time_test[1:], control_actions, 'g-', 
            label='时间感知GRU-TD3控制力', linewidth=1.5)
axes[2].set_ylabel('控制力 (N)')
axes[2].set_title('控制力时程（时间感知）')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

# 扰动力对比
if 'disturbances' in test_data_with_control:
    disturbances = test_data_with_control['disturbances']
    axes[3].plot(time_test[:len(disturbances)], disturbances, 'k-', 
                label='时间感知扰动', linewidth=1.0, alpha=0.7)
    axes[3].set_ylabel('扰动力 (N)')
    axes[3].set_xlabel('时间 (s)')
    axes[3].set_title('扰动力时程（含时间噪声）')
    axes[3].legend()
    axes[3].grid(True, alpha=0.3)
else:
    # 如果没有扰动数据，显示控制力的频谱
    from scipy.fft import fft, fftfreq
    control_fft = np.abs(fft(control_actions))
    freqs = fftfreq(len(control_actions), env.Ts)
    positive_freqs = freqs[:len(freqs)//2]
    positive_fft = control_fft[:len(control_fft)//2]
    
    axes[3].plot(positive_freqs, positive_fft, 'g-', linewidth=1.5)
    axes[3].set_ylabel('幅值')
    axes[3].set_xlabel('频率 (Hz)')
    axes[3].set_title('控制力频谱')
    axes[3].grid(True, alpha=0.3)
    axes[3].set_xlim(0, 10)  # 显示0-10Hz

plt.tight_layout()
plt.savefig(f'{plot_dir}/time_aware_performance_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

# 时间感知特性分析图
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# 频域分析
from scipy import signal
freq, psd_controlled = signal.welch(test_data_with_control['all_states'][:, 3], 
                                   fs=1/env.Ts, nperseg=min(1024, len(test_data_with_control['all_states'])//4))
freq, psd_uncontrolled = signal.welch(test_data_no_control['all_states'][:, 3], 
                                     fs=1/env.Ts, nperseg=min(1024, len(test_data_no_control['all_states'])//4))

axes[0,0].semilogy(freq, psd_controlled, 'b-', label='时间感知控制', linewidth=2)
axes[0,0].semilogy(freq, psd_uncontrolled, 'r--', label='无控制', linewidth=2, alpha=0.7)
axes[0,0].set_xlabel('频率 (Hz)')
axes[0,0].set_ylabel('功率谱密度 (m²/Hz)')
axes[0,0].set_title('位移响应功率谱密度对比')
axes[0,0].legend()
axes[0,0].grid(True, alpha=0.3)
axes[0,0].set_xlim(0, 6)

# 控制力统计
axes[0,1].hist(control_actions, bins=50, alpha=0.7, color='green', edgecolor='black')
axes[0,1].set_xlabel('控制力 (N)')
axes[0,1].set_ylabel('频次')
axes[0,1].set_title('控制力分布')
axes[0,1].grid(True, alpha=0.3)

# 控制力变化率
if len(control_actions) > 1:
    control_rate = np.diff(control_actions) / env.Ts
    axes[1,0].plot(time_test[1:-1], control_rate, 'orange', linewidth=1.0)
    axes[1,0].set_xlabel('时间 (s)')
    axes[1,0].set_ylabel('控制力变化率 (N/s)')
    axes[1,0].set_title('控制力变化率（平滑度指标）')
    axes[1,0].grid(True, alpha=0.3)

# 误差分析
position_error = test_data_with_control['all_states'][:, 3]  # 位移即为误差（目标为0）
axes[1,1].plot(time_test, np.abs(position_error), 'purple', linewidth=1.5)
axes[1,1].set_xlabel('时间 (s)')
axes[1,1].set_ylabel('|位移误差| (m)')
axes[1,1].set_title('位移误差绝对值')
axes[1,1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(f'{plot_dir}/time_aware_detailed_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

# 性能总结
print("\n时间感知GRU-TD3性能总结:")
print("="*50)
print(f"振动抑制效果:")
print(f"  - RMS位移减少: {metrics['rms_reduction']:.1f}%")
print(f"  - 最大位移减少: {metrics['max_reduction']:.1f}%")
print(f"  - 频域平均抑制: {metrics['avg_freq_suppression']:.1f}%")
print(f"\n控制品质:")
print(f"  - 控制能耗: {metrics['control_energy']:.4f}")
print(f"  - 控制平滑度: {metrics['control_smoothness']:.4f}")
print(f"  - 调节时间: {metrics['settling_time']:.2f} s")
print(f"\n时间感知特性:")
print(f"  - 时间噪声标准差: {TIME_NOISE_STD}")
print(f"  - 可变时间步长: {VARIABLE_TIMESTEP}")
print(f"  - 时间输入启用: {USE_TIME_INPUT}")

print(f"\n所有分析图表已保存完成！")
print(f"图表保存路径: {plot_dir}")
print(f"  - 性能对比图: time_aware_performance_comparison.png")
print(f"  - 详细分析图: time_aware_detailed_analysis.png")

logging.info("时间感知GRU-TD3训练和测试完成，所有结果已保存")