In [None]:
#!pip install -q -U einops datasets matplotlib tqdm

import math
from inspect import isfunction
from functools import partial

from tqdm.auto import tqdm
from einops import rearrange, reduce
#from einops.layers.torch import Rearrange

import torch
from torch import nn, einsum
import torch.nn.functional as F

In [None]:
import torch.nn as nn
from einops import rearrange  # 假设已经导入了einops库中的rearrange函数
#from torch_utils import exists  # 假设已经定义了exists函数，用于检查对象是否存在

# 自行定义exists函数，用于检查对象是否存在（非None）
def exists(val):
    return val is not None

# 定義一個基礎的Block塊
class Block(nn.Module):
    def __init__(self, dim, dim_out, groups=8):
        super().__init__()
        # 修改為1D卷積，kernel size依據需要設定
        self.proj = nn.Conv1d(dim, dim_out, 3, padding=1)
        self.norm = nn.GroupNorm(groups, dim_out)
        self.act = nn.SiLU()

    def forward(self, x, scale_shift=None):
        x = self.proj(x)  # 卷積操作
        # print("進入 GroupNorm 前的 x 形狀:", x.shape)
        x = self.norm(x)  # 進行GroupNorm
        if exists(scale_shift):
            scale, shift = scale_shift
            x = x * (scale + 1) + shift
        x = self.act(x)  # 激活函數
        return x


# 定义一个ResnetBlock类，用于构建残差网络中的基本块
class ResnetBlock(nn.Module):
    def __init__(self, dim, dim_out, *, time_emb_dim=None, groups=8):
        super().__init__()
        #如果有time_emb＿dim存在,用一個mlp處理時間嵌入
        self.mlp = (
            nn.Sequential(nn.SiLU(), nn.Linear(time_emb_dim, dim_out))
            if exists(time_emb_dim)
            else None
        )
        #用兩個基礎模塊處裡時續數據
        self.block1 = Block(dim, dim_out, groups=groups)
        self.block2 = Block(dim_out, dim_out, groups=groups)
        #如果輸入與輸出維度不同,用Conv1d調整
        self.res_conv = nn.Conv1d(dim, dim_out, 1) if dim != dim_out else nn.Identity()

    def forward(self, x, time_emb=None):
        h = self.block1(x)
        if exists(self.mlp) and exists(time_emb):
            time_emb = self.mlp(time_emb)
            # 注意，這裡調整成1D格式
            #直接將time_emb用broadcast的方式

            h = rearrange(time_emb, "b c -> b c 1") + h
        h = self.block2(h)
        return h + self.res_conv(x)
#將 x = (batch_size, dim, time_steps)轉成 (batch_size, dim＿out, time_steps)

In [None]:
import torch
from torch import nn
from einops import rearrange

# 定义标准的多头注意力机制类
class Attention(nn.Module):
    def __init__(self, dim, heads=4, dim_head=32):
        super().__init__()
        # 对查询向量进行缩放，以避免梯度消失问题
        self.scale = dim_head ** -0.5
        # 多头注意力的头数
        self.heads = heads
        # 计算多头注意力层的隐藏维度大小
        hidden_dim = dim_head * heads
        # 使用1D卷积层将输入映射到 QKV（查询、键、值）空间
        # 输出的通道数是3倍的hidden_dim，分别对应 Q、K、V
        self.to_qkv = nn.Conv1d(dim, hidden_dim * 3, 1, bias=False)
        # 定义输出层，使用1D卷积将注意力的输出映射回输入维度
        self.to_out = nn.Conv1d(hidden_dim, dim, 1)

    def forward(self, x):
        # 输入的x张量形状为 (batch, features, time)
        b, c, t = x.shape
        # 通过卷积层获取 Q、K、V 三个张量，分别对应查询、键和值
        qkv = self.to_qkv(x).chunk(3, dim=1)  # 将卷积结果按通道维度切割为3部分

        # 使用Einops将Q、K、V重排为适合注意力计算的形状
        # 形状变为 (batch, heads, 每个头的特征维度, time)
        q, k, v = map(
            lambda t: rearrange(t, "b (h c) t -> b h c t", h=self.heads), qkv
        )

        # 缩放查询向量
        q = q * self.scale

        # 计算查询和键之间的相似度分数（注意力分数）
        sim = torch.einsum("b h d i, b h d j -> b h i j", q, k)

        # 为了数值稳定性，从相似度分数中减去最大值
        sim = sim - sim.amax(dim=-1, keepdim=True).detach()

        # 对相似度分数应用Softmax，得到注意力权重
        attn = sim.softmax(dim=-1)

        # 使用注意力权重对值（V）进行加权
        out = torch.einsum("b h i j, b h d j -> b h i d", attn, v)

        # 将输出重排回 (batch, heads * 每个头的特征维度, time) 的形式
        out = rearrange(out, "b h t d -> b (h d) t")

        # 通过输出卷积层映射回输入维度，返回结果
        return self.to_out(out)

# 定义线性注意力机制类
class LinearAttention(nn.Module):
    def __init__(self, dim, heads=4, dim_head=32):
        super().__init__()
        # 对查询向量进行缩放
        self.scale = dim_head ** -0.5
        # 多头注意力的头数
        self.heads = heads
        # 计算多头注意力层的隐藏维度大小
        hidden_dim = dim_head * heads
        # 使用1D卷积层将输入映射到 QKV 空间
        self.to_qkv = nn.Conv1d(dim, hidden_dim * 3, 1, bias=False)
        # 定义输出层，将注意力的输出映射回输入维度，并使用GroupNorm归一化
        self.to_out = nn.Sequential(nn.Conv1d(hidden_dim, dim, 1),
                                    nn.GroupNorm(1, dim))

    def forward(self, x):
        # 输入的x张量形状为 (batch, features, time)
        b, c, t = x.shape
        # 通过卷积层获取 Q、K、V 三个张量
        qkv = self.to_qkv(x).chunk(3, dim=1)  # 按通道维度分割

        # 使用Einops将Q、K、V重排为适合注意力计算的形状
        q, k, v = map(
            lambda t: rearrange(t, "b (h c) t -> b h c t", h=self.heads), qkv
        )

        # 对Q、K分别进行softmax操作，Q在通道维度，K在时间维度
        q = q.softmax(dim=-2)
        k = k.softmax(dim=-1)
        # 缩放查询向量
        q = q * self.scale

        # 计算键和值的加权组合，得到上下文矩阵
        context = torch.einsum("b h d n, b h e n -> b h d e", k, v)

        # 使用上下文矩阵对查询向量进行加权
        out = torch.einsum("b h d e, b h d n -> b h e n", context, q)

        # 将输出重排回 (batch, heads * 每个头的特征维度, time) 的形式
        out = rearrange(out, "b h c t -> b (h c) t")

        # 通过输出卷积层和GroupNorm，返回结果
        return self.to_out(out)
#輸入與輸出的形狀相同

In [None]:
import torch.nn as nn

class Residual(nn.Module):
    def __init__(self, fn):
        super().__init__()
        self.fn = fn  # 接受一個子模塊，例如卷積或注意力層

    def forward(self, x):
        return self.fn(x) + x  # 將輸入加到模塊的輸出上

class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        # 使用GroupNorm替代LayerNorm，適合多維張量
        self.norm = nn.GroupNorm(1, dim)  # 分成1組，相當於對每個通道獨立歸一化
        self.fn = fn  # 接受一個子模塊

    def forward(self, x):
        return self.fn(self.norm(x))  # 先對 x 進行層歸一化，再傳遞給子模塊

class Downsample(nn.Module):
    def __init__(self, dim):
        super().__init__()
        # 使用卷积核大小为4，步长为2的卷积实现下采样
        self.conv = nn.Conv1d(dim, dim, 4, stride=2, padding=1)

    def forward(self, x):
        return self.conv(x)  # 下采样特征图，宽度和高度减半

class Upsample(nn.Module):
    def __init__(self, dim):
        super().__init__()
        # 使用轉置卷積進行上采樣，將時間步長放大一倍
        self.upsample = nn.ConvTranspose1d(dim, dim, kernel_size=4, stride=2, padding=1)

    def forward(self, x):
        return self.upsample(x)


In [None]:
import torch
from torch import nn
from einops import rearrange

# 定义U-Net网络结构，适用于时间序列数据
class Unet(nn.Module):
    def __init__(
            self,
            dim,  # 基本隐藏层维度
            init_dim=None,  # 初始层维度，如果未提供则会根据dim计算得出
            out_dim=None,  # 输出维度
            dim_mults=(1, 2, 4, 8),  # 控制每个阶段隐藏层维度倍增的倍数
            channels=1,  # 输入通道数，默认为1，适应时间序列数据
            with_time_emb=True,  # 是否使用时间嵌入
            resnet_block_groups=8,  # ResNet块中的组数
            # use_convnext=False,  # 是否使用ConvNeXt块
            # convnext_mult=2,  # ConvNeXt块的维度倍增因子
    ):
        super().__init__()
        # 初始层设置
        self.channels = channels
        init_dim = init_dim or dim // 3 * 2
        self.init_conv = nn.Conv1d(channels, init_dim, 7, padding=3)  # 使用1D卷积
        dims = [init_dim, *map(lambda m: dim * m, dim_mults)]  # 计算每层维度
        # print(f'每層維度:{dims}')
        in_out = list(zip(dims[:-1], dims[1:]))

        block_klass = partial(ResnetBlock, groups=resnet_block_groups)

        # 时间嵌入层
        if with_time_emb:
            time_dim = dim * 4
            self.time_mlp = nn.Sequential(
                nn.Linear(1, time_dim),  # 修改为线性时间嵌入，适用于1D数据
                nn.GELU(),
                nn.Linear(time_dim, time_dim),
            )
        else:
            time_dim = None
            self.time_mlp = None

        # 下采样层
        self.downs = nn.ModuleList([])
        self.ups = nn.ModuleList([])
        num_resolutions = len(in_out)  # 解析的层数
        # 构建下采样模块 ,每層維度:[42, 64, 128, 256, 512]
        for ind, (dim_in, dim_out) in enumerate(in_out):
            # print((dim_in, dim_out))
            is_last = ind >= (num_resolutions - 1)  # 是否为最后一层
            self.downs.append(  # 添加下采样块
                nn.ModuleList(
                    [
                        block_klass(dim_in, dim_out, time_emb_dim=time_dim),  # 卷积块
                        block_klass(dim_out, dim_out, time_emb_dim=time_dim),  # 卷积块
                        Residual(PreNorm(dim_out, LinearAttention(dim_out))),  # 残差连接和注意力模块
                        Downsample(dim_out) if not is_last else nn.Identity(),  # 下采样或恒等映射
                    ]
                )
            )

        # 中间层
        mid_dim = dims[-1]
        self.mid_block1 = block_klass(mid_dim, mid_dim, time_emb_dim=time_dim)
        self.mid_attn = Residual(PreNorm(mid_dim, Attention(mid_dim)))  # 使用多头注意力
        self.mid_block2 = block_klass(mid_dim, mid_dim, time_emb_dim=time_dim)

       # 构建上采样模块
        for ind, (dim_in, dim_out) in enumerate(reversed(in_out[1:])):
            is_last = ind >= (num_resolutions - 1)  # 是否是最后一次上采样，减2是因为我们需要留出一个输出层
            self.ups.append(
                nn.ModuleList(
                    [
                        # 卷积块，这里输入维度翻倍是因为上采样过程中会与编码器阶段的相应层进行拼接
                        block_klass(dim_out * 2, dim_in, time_emb_dim=time_dim),
                        # 卷积块
                        block_klass(dim_in, dim_in, time_emb_dim=time_dim),
                        # 残差和注意力模块
                        Residual(PreNorm(dim_in, LinearAttention(dim_in))),
                        # 上采样或恒等映射
                        Upsample(dim_in) if not is_last else nn.Identity(),
                    ]
                )
            )

        # 输出层
        out_dim = out_dim or channels
        self.final_conv = nn.Sequential(
            nn.Conv1d(dim, dim, 3, padding=1),
            nn.Conv1d(dim, out_dim, 1)  # 使用1D卷积输出最终结果
        )

    # 前向传播函数
    def forward(self, x, time):
        # 初始卷积层
        x = self.init_conv(x)

        # 如果有时间嵌入，进行时间编码
        # print(f'time的大小{time.shape}')
        t = self.time_mlp(time) if exists(self.time_mlp) else None

        h = []  # 存储下采样时的特征
        # 下采样过程
        for block1, block2, attn, downsample in self.downs:
            x = block1(x, t)  # 应用卷积块

            x = block2(x, t)  # 应用卷积块

            x = attn(x)  # 应用注意力模块

            h.append(x)  # 存储特征图以便后续的拼接
            x = downsample(x)  # 应用下采样或恒等映射
            # print(x.shape)

        # 中间层或瓶颈层
        x = self.mid_block1(x, t)  # 第一个中间卷积块
        x = self.mid_attn(x)  # 中间层的注意力模块
        x = self.mid_block2(x, t)  # 第二个中间卷积块

        # print(f'準備上採樣{x.shape}')
        # 上采样过程
        for block1, block2, attn, upsample in self.ups:
            # 拼接特征图和对应的编码器阶段的特征图
            x = torch.cat((x, h.pop()), dim=1)
            x = block1(x, t)  # 应用卷积块
            x = block2(x, t)  # 应用卷积块
            x = attn(x)  # 应用注意力模块
            x = upsample(x)  # 应用上采样或恒等映射

        # 最后的输出层
        return self.final_conv(x)


In [None]:
# import torch
# from diffusers import DDPMScheduler

# # 初始化擴散調度器
# num_train_timesteps = 1000
# noise_scheduler = DDPMScheduler(num_train_timesteps=num_train_timesteps)

# # 去噪過程
# # ddim
# def denoise(noisy_data, start_t, model, noise_scheduler):
#     """
#     使用 DDIM 方法一步到位预测 x_0。

#     Args:
#         noisy_data (torch.Tensor): 噪声数据，形状为 (batch_size, features, seq_len)。
#         start_t (int): 起始时间步。
#         model (nn.Module): 噪声预测模型，输入为 (data, t)。
#         noise_scheduler (DDPMScheduler): 噪声调度器，提供扩散参数。

#     Returns:
#         torch.Tensor: 直接预测的 x_0。
#     """
#     #t = torch.randint(min_timesteps, max_timesteps, (batch_size,), device=device).long()
#     # 生成当前时间步的张量
#     t_tensor = start_t.unsqueeze(1).float()
#     # t_tensor = torch.tensor([start_t], device=noisy_data.device).float().unsqueeze(1)
#     # print(start_t.shape)
#     # 预测噪声
#     predicted_noise = model(noisy_data, t_tensor)
#     # print(predicted_noise.shape)
#     # print(noisy_data.shape)
#     # 获取调度器参数
#     alpha_bar_t = noise_scheduler.alphas_cumprod[start_t].view(-1, 1, 1)

#     # alpha_bar_t = noise_scheduler.alphas_cumprod[start_t]
#     # print(alpha_bar_t.shape)

#     # 直接预测 x_0
#     # x0_pred = (noisy_data - torch.sqrt(1 - alpha_bar_t) * predicted_noise) / torch.sqrt(alpha_bar_t)
#     x0_pred = (noisy_data - torch.sqrt(torch.clamp(1 - alpha_bar_t, min=1e-5)) * predicted_noise) / torch.sqrt(alpha_bar_t)

#     return x0_pred

# # ddpm

# # def denoise(noisy_data, start_t, model, noise_scheduler):

# #     current_data = noisy_data
# #     start_t = start_t.max().item()
# #     for t in range(start_t, 0, -1):
# #         # t_tensor = torch.tensor([t], device=current_data.device).float().unsqueeze(1)
# #         # 預測噪聲
# #         #print(current_data.shape)
# #         # t = t.long()
# #         predicted_noise = model(current_data, torch.tensor([t], device=current_data.device).float().unsqueeze(1))

# #         # 計算去噪數據
# #         alpha_t = noise_scheduler.alphas[t]
# #         noise_scheduler.alphas_cumprod = noise_scheduler.alphas_cumprod.to(device)
# #         alpha_bar_t = noise_scheduler.alphas_cumprod[t]
# #         alpha_bar_t_prev = noise_scheduler.alphas_cumprod[t - 1] if t > 0 else 1.0
# #         beta_t = 1 - alpha_t

# #         mean = (1 / torch.sqrt(alpha_t)) * (current_data - beta_t / torch.sqrt(1 - alpha_bar_t) * predicted_noise)
# #         if t > 0:
# #             variance = torch.sqrt(beta_t * (1 - alpha_bar_t_prev) / (1 - alpha_bar_t))
# #             current_data = mean + variance * torch.randn_like(current_data)
# #         else:
# #             current_data = mean
# #     return current_data

# from torch.amp import autocast


# def diffusion_augmentation(data, noise_scheduler, model, t_min=799, t_max=900):
#     device = data.device  # 確保所有張量在同一設備
#     noise_scheduler.alphas_cumprod = noise_scheduler.alphas_cumprod.to(device)

#     t = torch.randint(t_min, t_max, (data.size(0),), device=device)
#     alpha_bar = noise_scheduler.alphas_cumprod[t].view(-1, 1, 1)
#     noise = torch.randn_like(data, device=device)

#     with torch.no_grad():
#         with autocast(device_type="cuda", dtype=torch.float16):  # 新語法，指定設備
#             noisy_data = torch.sqrt(alpha_bar) * data + torch.sqrt(1 - alpha_bar) * noise
#             augmented_data = denoise(noisy_data,
#                                      t# t.max().item()
#                                      , model.to(device), noise_scheduler)

#     return augmented_data

In [None]:
import torch
from diffusers import DDPMScheduler

# 初始化擴散調度器
num_train_timesteps = 1000
noise_scheduler = DDPMScheduler(num_train_timesteps=num_train_timesteps)

# 去噪過程
# ddim


# ddpm

import torch

def denoise(noisy_data, start_t, model, noise_scheduler, step_size=100):
    device = noisy_data.device  # 确保所有张量在同一设备上
    current_data = noisy_data
    model = model.to(device)  # 确保模型在正确的设备上
    noise_scheduler.alphas_cumprod = noise_scheduler.alphas_cumprod.to(device)  # 确保 alphas_cumprod 在正确的设备上

    for t in range(start_t.max(), 0, -step_size):
        t_tensor = torch.tensor([t], device=device, dtype=torch.float32).unsqueeze(1)
        predicted_noise = model(current_data, t_tensor)

        alpha_t = noise_scheduler.alphas[t]
        alpha_bar_t = noise_scheduler.alphas_cumprod[t]
        alpha_bar_t_prev = noise_scheduler.alphas_cumprod[t - step_size] if t - step_size > 0 else 1.0
        beta_t = 1 - alpha_t

        mean = (1 / torch.sqrt(alpha_t)) * (current_data - beta_t / torch.sqrt(1 - alpha_bar_t) * predicted_noise)
        if t > step_size:
            variance = torch.sqrt(beta_t * (1 - alpha_bar_t_prev) / (1 - alpha_bar_t))
            current_data = mean + variance * torch.randn_like(current_data)
        else:
            current_data = mean

    return current_data


from torch.amp import autocast


def diffusion_augmentation(data, noise_scheduler, model, fixed_t=700):
    device = data.device
    # print(f'input_shape:{data.shape}')  # 确保所有张量在同一设备
    noise_scheduler.alphas_cumprod = noise_scheduler.alphas_cumprod.to(device)

    t = torch.full((data.size(0),), fixed_t, device=device, dtype=torch.long)  # 固定时间步为 700
    alpha_bar = noise_scheduler.alphas_cumprod[t].view(-1, 1, 1)
    noise = torch.randn_like(data, device=device)

    with torch.no_grad():
        with autocast(device_type="cuda", dtype=torch.float16):  # 使用 autocast 进行混合精度计算
            noisy_data = torch.sqrt(alpha_bar) * data + torch.sqrt(1 - alpha_bar) * noise
            augmented_data = denoise(noisy_data, t, model.to(device), noise_scheduler)
    # print(f'output_shape:{augmented_data.shape}')
    return augmented_data


##傳統的augmentation

In [None]:
class TimeSeriesAugmentor:
    def __init__(self, jitter_sigma=0.01, scaling_sigma=0.1, mask_ratio=0.1):
        self.jitter_sigma = jitter_sigma
        self.scaling_sigma = scaling_sigma
        self.mask_ratio = mask_ratio

    def jitter(self, X):
        noise = np.random.normal(loc=0, scale=self.jitter_sigma, size=X.shape)
        return X + noise

    def scaling(self, X):
        scaling_factor = np.random.normal(loc=1.0, scale=self.scaling_sigma, size=(X.shape[0], X.shape[1], 1))
        return X * scaling_factor

    def masking(self, X):
        X_masked = X.clone()
        batch_size, num_features, seq_len = X.shape
        num_mask = int(seq_len * self.mask_ratio)

        for i in range(batch_size):
            mask_idx = np.random.choice(seq_len, num_mask, replace=False)
            X_masked[i, :, mask_idx] = 0  # 可以改成 np.nan 視你的模型處理方式而定

        return X_masked

    def generate_views(self, X):
        """
        輸入 X: shape = (batch_size, features, time)
        回傳 view1, view2
        """
        view1 = self.jitter(X)
        view2 = self.scaling(X)
        view2 = self.masking(view2)
        return view1, view2


##在使用diff_model訓練時再加就好

In [None]:
# model = Unet(
#     dim=64,            # 基礎隱藏層維度
#     channels=31,        # 輸入通道數，適合單通道時序數據
#     dim_mults=(1, 2, 4, 8),  # 各層維度倍增
#     with_time_emb=True,  # 使用時間嵌入

# ).to('cpu')
# model = torch.load("/content/model_with_structure.pth", weights_only=False)

In [None]:
import torch
import torch.nn as nn

class TransformerEncoder(nn.Module):
    def __init__(self, input_dim, embed_dim, num_heads, num_layers, dropout=0.1):
        super(TransformerEncoder, self).__init__()
        self.input_dim = input_dim  # 特徵數量
        self.embed_dim = embed_dim  # Transformer 的嵌入維度

        # 將輸入特徵嵌入到 Transformer 的維度
        self.feature_embedding = nn.Linear(input_dim, embed_dim)

        # 位置編碼
        self.positional_encoding = nn.Parameter(torch.zeros(1, 32, embed_dim))  # 時間長度固定為 32

        # Transformer 編碼器
        encoder_layer = nn.TransformerEncoderLayer(embed_dim, num_heads, dim_feedforward=embed_dim * 4, dropout=dropout)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers)

    def forward(self, x):
        """
        Args:
            x: [batch_size, 特徵, 時間] -> [batch_size, input_dim, seq_len]
        Returns:
            encoded: [batch_size, embed_dim] - 編碼後的特徵
        """
        # 調整輸入形狀
        x = x.permute(0, 2, 1)  # [batch_size, 特徵, 時間] -> [batch_size, 時間, 特徵]
        x = self.feature_embedding(x)  # [batch_size, 時間, 特徵] -> [batch_size, 時間, embed_dim]

        # 添加位置編碼
        x = x + self.positional_encoding

        # 應用 Transformer
        encoded = self.transformer(x)  # [batch_size, 時間, embed_dim]
        return encoded[:, 0, :]  # 取 CLS 標籤作為全局特徵


In [None]:
class SimSiamForTimeSeries(nn.Module):
    def __init__(self, input_dim, embed_dim=128, projector_dim=256, predictor_dim=128, num_heads=4, num_layers=2):
        super(SimSiamForTimeSeries, self).__init__()
        self.encoder = TransformerEncoder(input_dim, embed_dim, num_heads, num_layers)

        # Projector: MLP
        self.projector = nn.Sequential(
            nn.Linear(embed_dim, projector_dim),
            nn.BatchNorm1d(projector_dim),
            nn.GELU(),
            nn.Linear(projector_dim, projector_dim)
        )

        # Predictor: MLP
        self.predictor = nn.Sequential(
            nn.Linear(projector_dim, predictor_dim),
            nn.BatchNorm1d(predictor_dim),
            nn.GELU(),
            nn.Linear(predictor_dim, projector_dim)
        )

    def forward(self, x1, x2):
        z1 = self.projector(self.encoder(x1))  # View 1
        z2 = self.projector(self.encoder(x2))  # View 2
        p1 = self.predictor(z1)
        p2 = self.predictor(z2)
        return p1, z1.detach(), p2, z2.detach()

In [None]:
def simsiam_loss(p1, z2, p2, z1, verbose=False):
    def negative_cosine_similarity(p, z):
        p = torch.nn.functional.normalize(p, dim=1, eps=1e-6)
        z = torch.nn.functional.normalize(z, dim=1, eps=1e-6)
        return -torch.mean(torch.sum(p * z, dim=1))

    loss1 = negative_cosine_similarity(p1, z2)
    loss2 = negative_cosine_similarity(p2, z1)
    if verbose:
        print(f"loss1: {loss1.item():.4f}, loss2: {loss2.item():.4f}")
    return (loss1 + loss2) / 2



##diff_augment的訓練函數

In [None]:
def validate(model, simsiam_model, noise_scheduler, val_loader, device):
    """
    使用 DataLoader 驗證 SimSiam 模型。

    Args:
        model: 擴散模型，用於生成增強數據。
        simsiam_model: SimSiam 模型。
        noise_scheduler: 噪聲調度器 (DDPMScheduler)。
        val_loader: 驗證數據的 DataLoader。
        device: 設備 (CPU/GPU)。
    Returns:
        avg_val_loss: 驗證集的平均損失。
    """
    simsiam_model.eval()  # 設置為驗證模式
    total_val_loss = 0.0

    with torch.no_grad():  # 不計算梯度
        with tqdm(val_loader, desc="Validation", unit="batch") as val_pbar:
            for batch in val_pbar:
                batch = batch.to(device).float()  # ✅ 放到 GPU

                # batch = batch.to(torch.float32)



                # 數據增強
                view1 = diffusion_augmentation(batch, noise_scheduler, model)
                view2 = diffusion_augmentation(batch, noise_scheduler, model)

                # SimSiam 前向傳播
                p1, z1, p2, z2 = simsiam_model(view1, view2)

                # 計算損失
                loss = simsiam_loss(p1, z2, p2, z1)
                total_val_loss += loss.item()

                # 更新進度條
                val_pbar.set_postfix(loss=loss.item())

    avg_val_loss = total_val_loss / len(val_loader)
    return avg_val_loss


In [None]:
import os
def load_checkpoint(model, optimizer, filepath="checkpoint.pth"):
    if os.path.exists(filepath):
        checkpoint = torch.load(filepath, map_location=torch.device("cpu"))
        model.load_state_dict(checkpoint["model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
        start_epoch = checkpoint["epoch"] + 1
        print(f"Checkpoint loaded from {filepath}, starting at epoch {start_epoch}")
        return start_epoch
    else:
        print(f"No checkpoint found at {filepath}, starting from scratch")
        return 0


In [None]:
import os
from datetime import datetime
from tqdm import tqdm

def train(model, simsiam_model, noise_scheduler, optimizer, train_dataloader, val_dataloader, epochs, device, resume_from_checkpoint=None):
    model = model.to(device)
    simsiam_model = simsiam_model.to(device)
    noise_scheduler.alphas_cumprod = noise_scheduler.alphas_cumprod.to(device)

    results = []  # 记录每个 epoch 的结果
    start_epoch = 0

    # 加载检查点
    if resume_from_checkpoint:
        checkpoint = torch.load(resume_from_checkpoint, map_location=device)
        simsiam_model.load_state_dict(checkpoint["simsiam_model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
        start_epoch = checkpoint["epoch"] + 1
        print(f"已加载检查点: {resume_from_checkpoint}, 从第 {start_epoch} 个 epoch 开始")

    for epoch in range(start_epoch, epochs):
        simsiam_model.train()
        epoch_train_loss = 0.0

        # 训练循环
        with tqdm(train_dataloader, desc=f"Epoch {epoch + 1}/{epochs}", unit="batch") as train_pbar:
            for batch in train_pbar:
                batch = batch.to(device).float()  # ✅ 放到 GPU

                # batch = batch.to(torch.float32)



                # 检查数据是否有 NaN 或 Inf
                if torch.isnan(batch).any() or torch.isinf(batch).any():
                    print("Batch contains NaN or Inf. Skipping this batch.")
                    continue

                # 数据增强(view裡面包含nan)
                view1 = diffusion_augmentation(batch, noise_scheduler, model)
                view2 = diffusion_augmentation(batch, noise_scheduler, model)

                # 检查增强视图是否有 NaN 或 Inf
                if torch.isnan(view1).any() or torch.isinf(view1).any():
                    print("view1 contains NaN or Inf. Skipping this batch.")
                    continue
                if torch.isnan(view2).any() or torch.isinf(view2).any():
                    print("view2 contains NaN or Inf. Skipping this batch.")
                    continue

                # SimSiam 前向传播
                p1, z1, p2, z2 = simsiam_model(view1, view2)

                # 检查模型输出是否有 NaN 或 Inf
                # for name, tensor in {"p1": p1, "z1": z1, "p2": p2, "z2": z2}.items():
                #     if torch.isnan(tensor).any() or torch.isinf(tensor).any():
                #         print(f"{name} contains NaN or Inf. Skipping this batch.")
                #         continue

                # 计算损失
                loss = simsiam_loss(p1, z2, p2, z1)
                # print(f"loss = {loss.item()}")

                # 检查损失是否为 NaN 或 Inf
                # print(f"loss = {loss.item()}")
                # continue
                # if torch.isnan(loss).any() or torch.isinf(loss).any():
                #     print("Loss contains NaN or Inf. Skipping this batch.")
                #     continue

                # 反向传播和优化
                optimizer.zero_grad()
                loss.backward()

                # 梯度裁剪
                torch.nn.utils.clip_grad_norm_(simsiam_model.parameters(), max_norm=1.0)

                optimizer.step()

                epoch_train_loss += loss.item()
                train_pbar.set_postfix(loss=loss.item())

        # 计算训练和验证损失
        avg_train_loss = epoch_train_loss / len(train_dataloader)
        avg_val_loss = validate(model, simsiam_model, noise_scheduler, val_dataloader, device)

        results.append({"epoch": epoch + 1, "train_loss": avg_train_loss, "val_loss": avg_val_loss})
        print(f"Epoch {epoch + 1}/{epochs}: Train Loss = {avg_train_loss:.4f}, Validation Loss = {avg_val_loss:.4f}")

        # 保存检查点
        now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S"+'ddpm_700')
        save_path = f"checkpoint_epoch_{epoch+1}_{now}.pt"
        torch.save({
            "simsiam_model_state_dict": simsiam_model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
            "epoch": epoch,
        }, save_path)
        tqdm.write(f"Checkpoint saved at {save_path}")

    return results


##傳統augment_的訓練函數

In [None]:
import numpy as np

# 建立假資料 X: shape = (4, 31, 32)
np.random.seed(42)  # 為了可重現
X = np.random.rand(4, 31, 32)  # 假設股價因子數值都在 0~1

# 初始化增強器
augmentor = TimeSeriesAugmentor(jitter_sigma=0.02, scaling_sigma=0.15, mask_ratio=0.2)

# 生成 view1（jittering）與 view2（scaling + masking）
view1, view2 = augmentor.generate_views(X)

# 顯示測試結果
print("Original X[0][0][:5]:\n", X[0][0][:5])
print("View1 (Jittered) X[0][0][:5]:\n", view1.shape)
print("View2 (Scaled + Masked) X[0][0][:5]:\n", view2.shape)



AttributeError: 'numpy.ndarray' object has no attribute 'clone'

In [None]:
def validate(augmentor,simsiam_model, val_loader, device):
    """
    使用 DataLoader 驗證 SimSiam 模型。

    Args:
        model: 擴散模型，用於生成增強數據。
        simsiam_model: SimSiam 模型。
        noise_scheduler: 噪聲調度器 (DDPMScheduler)。
        val_loader: 驗證數據的 DataLoader。
        device: 設備 (CPU/GPU)。
    Returns:
        avg_val_loss: 驗證集的平均損失。
    """
    simsiam_model.eval()  # 設置為驗證模式
    total_val_loss = 0.0

    with torch.no_grad():  # 不計算梯度
        with tqdm(val_loader, desc="Validation", unit="batch") as val_pbar:
            for batch in val_pbar:
                batch = batch.to(device).float()  # ✅ 放到 GPU

                # batch = batch.to(torch.float32)



                # 數據增強
                view1, view2 = augmentor.generate_views(batch)
                view1 = view1.float()
                view2 = view2.float()

                # SimSiam 前向傳播
                p1, z1, p2, z2 = simsiam_model(view1, view2)

                # 計算損失
                loss = simsiam_loss(p1, z2, p2, z1)
                total_val_loss += loss.item()

                # 更新進度條
                val_pbar.set_postfix(loss=loss.item())

    avg_val_loss = total_val_loss / len(val_loader)
    return avg_val_loss


In [None]:
import os
def load_checkpoint(model, optimizer, filepath="checkpoint.pth"):
    if os.path.exists(filepath):
        checkpoint = torch.load(filepath, map_location=torch.device("cpu"))
        model.load_state_dict(checkpoint["model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
        start_epoch = checkpoint["epoch"] + 1
        print(f"Checkpoint loaded from {filepath}, starting at epoch {start_epoch}")
        return start_epoch
    else:
        print(f"No checkpoint found at {filepath}, starting from scratch")
        return 0


In [None]:
import os
from datetime import datetime
from tqdm import tqdm

def train_tran(augmentor, simsiam_model, optimizer, train_dataloader, val_dataloader, epochs, device, resume_from_checkpoint=None):

    simsiam_model = simsiam_model.to(device)

    results = []  # 记录每个 epoch 的结果
    start_epoch = 0

    # 加载检查点
    if resume_from_checkpoint:
        checkpoint = torch.load(resume_from_checkpoint, map_location=device)
        simsiam_model.load_state_dict(checkpoint["simsiam_model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
        start_epoch = checkpoint["epoch"] + 1
        print(f"已加载检查点: {resume_from_checkpoint}, 从第 {start_epoch} 个 epoch 开始")

    for epoch in range(start_epoch, epochs):
        simsiam_model.train()
        epoch_train_loss = 0.0

        # 训练循环
        with tqdm(train_dataloader, desc=f"Epoch {epoch + 1}/{epochs}", unit="batch") as train_pbar:
            for batch in train_pbar:
                batch = batch.to(device).float()  # ✅ 放到 GPU

                # batch = batch.to(torch.float32)



                # 检查数据是否有 NaN 或 Inf
                if torch.isnan(batch).any() or torch.isinf(batch).any():
                    print("Batch contains NaN or Inf. Skipping this batch.")
                    continue

                # 数据增强(view裡面包含nan)
                view1, view2 = augmentor.generate_views(batch)
                view1 = view1.float()
                view2 = view2.float()


                # 检查增强视图是否有 NaN 或 Inf
                if torch.isnan(view1).any() or torch.isinf(view1).any():
                    print("view1 contains NaN or Inf. Skipping this batch.")
                    continue
                if torch.isnan(view2).any() or torch.isinf(view2).any():
                    print("view2 contains NaN or Inf. Skipping this batch.")
                    continue

                # SimSiam 前向传播
                p1, z1, p2, z2 = simsiam_model(view1, view2)
                loss = simsiam_loss(p1, z2, p2, z1)

                # 反向传播和优化
                optimizer.zero_grad()
                loss.backward()

                # 梯度裁剪
                torch.nn.utils.clip_grad_norm_(simsiam_model.parameters(), max_norm=1.0)

                optimizer.step()

                epoch_train_loss += loss.item()
                train_pbar.set_postfix(loss=loss.item())

        # 计算训练和验证损失
        avg_train_loss = epoch_train_loss / len(train_dataloader)
        avg_val_loss = validate(augmentor, simsiam_model, val_dataloader, device)

        results.append({"epoch": epoch + 1, "train_loss": avg_train_loss, "val_loss": avg_val_loss})
        print(f"Epoch {epoch + 1}/{epochs}: Train Loss = {avg_train_loss:.4f}, Validation Loss = {avg_val_loss:.4f}")

        # 保存检查点
        now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S"+'tran_augment')
        save_path = f"checkpoint_epoch_{epoch+1}_{now}.pt"
        torch.save({
            "simsiam_model_state_dict": simsiam_model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
            "epoch": epoch,
        }, save_path)
        tqdm.write(f"Checkpoint saved at {save_path}")

    return results


##load對比學習訓練資料

In [None]:
# import numpy as np
# from torch.utils.data import DataLoader, random_split

# # # 數據集分割
# # train_size = int(0.8 * len(dataset))
# # val_size = len(dataset) - train_size
# # train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
# # np.save("train_data.npy", train_dataset)
# # np.save('val_data.npy',val_dataset)

# # 加載數據
# train_dataset = np.load('/content/X_train_norm.npy')
# val_dataset = np.load('/content/X_val_norm.npy')

# # 創建 DataLoader
# train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
# val_dataloader = DataLoader(val_dataset, batch_size=128, shuffle=False)

In [None]:
import torch
import numpy as np
from torch.utils.data import DataLoader, random_split
# 讀取時
data = np.load('/content/dataset_LSTM_binary.npz')
data
X_train = data['X_train']
y_train = data['y_train']
X_test  = data['X_test']
y_test  = data['y_test']
print('X_train.shape =', X_train.shape)  # (samples_train, time_steps, features)
print('y_train.shape =', y_train.shape)  # (samples_train,)
print('X_test.shape  =', X_test.shape)   # (samples_test, time_steps, features)
print('y_test.shape  =', y_test.shape)   # (samples_test,)

# t_mean = X_train.mean(axis=(0, 2), keepdims=True)  # shape: (1, features, 1)
# t_std = X_train.std(axis=(0, 2), keepdims=True)    # shape: (1, features, 1)
# v_mean = X_train.mean(axis=(0, 2), keepdims=True)  # shape: (1, features, 1)
# v_std = X_train.std(axis=(0, 2), keepdims=True)    # shape: (1, features, 1)
# # 正規化
# X_train_norm = (X_train - t_mean) / (t_std + 1e-8)
# X_val_norm = (X_test - v_mean) / (v_std + 1e-8)
# 將 NumPy 陣列轉成 PyTorch tensor (float32)
train_dataset = torch.from_numpy(X_train.astype(np.float32)).permute(0, 2, 1)
val_dataset = torch.from_numpy(X_test.astype(np.float32)).permute(0, 2, 1)

train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=128, shuffle=False)

X_train.shape = (92583, 32, 31)
y_train.shape = (92583,)
X_test.shape  = (21717, 32, 31)
y_test.shape  = (21717,)


In [None]:
import numpy as np
train_dataset = np.load('/content/train_data.npy')
val_dataset = np.load('/content/val_data.npy')
t_mean = train_dataset.mean(axis=(0, 2), keepdims=True)  # shape: (1, features, 1)
t_std = train_dataset.std(axis=(0, 2), keepdims=True)    # shape: (1, features, 1)
v_mean = train_dataset.mean(axis=(0, 2), keepdims=True)  # shape: (1, features, 1)
v_std = train_dataset.std(axis=(0, 2), keepdims=True)    # shape: (1, features, 1)
# 正規化
X_train_norm = (train_dataset - t_mean) / (t_std + 1e-8)
X_val_norm = (val_dataset - v_mean) / (v_std + 1e-8)

np.save("X_train_norm.npy", X_train_norm)
np.save('X_val_norm.npy',X_val_norm)

In [None]:
device = 'cpu'#torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_dim = 31  # 特徵數
augmentor = TimeSeriesAugmentor(jitter_sigma=0.02, scaling_sigma=0.15, mask_ratio=0.2)
simsiam_model = SimSiamForTimeSeries(input_dim).to(device)
optimizer = torch.optim.Adam(simsiam_model.parameters(), lr=1e-5)
results = train_tran(
    augmentor ,
    simsiam_model ,
    optimizer,
    train_dataloader,
    val_dataloader,
    epochs =20,
    device = device,
    resume_from_checkpoint = False
)

NameError: name 'train_tran' is not defined

In [None]:
#-0.8137, Validation Loss = -0.8974
noise_scheduler = DDPMScheduler(num_train_timesteps = 1000)
device = 'cpu'#torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_dim = 31  # 特徵數
simsiam_model = SimSiamForTimeSeries(input_dim).to(device)
optimizer = torch.optim.Adam(simsiam_model.parameters(), lr=1e-5)
results = train(
    model,
    simsiam_model ,
    noise_scheduler,
    optimizer,
    train_dataloader,
    val_dataloader,
    epochs =20,
    device = device,
    resume_from_checkpoint = '/content/checkpoint_epoch_4_2025-04-21_07-45-24ddpm_400-700.pt'
)



已加载检查点: /content/checkpoint_epoch_4_2025-04-21_07-45-24ddpm_400-700.pt, 从第 4 个 epoch 开始


Epoch 5/20: 100%|██████████| 724/724 [1:52:32<00:00,  9.33s/batch, loss=-0.944]
Validation: 100%|██████████| 170/170 [24:19<00:00,  8.59s/batch, loss=-0.965]


Epoch 5/20: Train Loss = -0.9404, Validation Loss = -0.9542
Checkpoint saved at checkpoint_epoch_5_2025-04-29_15-20-10ddpm_300.pt


Epoch 6/20: 100%|██████████| 724/724 [1:51:42<00:00,  9.26s/batch, loss=-0.941]
Validation: 100%|██████████| 170/170 [24:08<00:00,  8.52s/batch, loss=-0.973]


Epoch 6/20: Train Loss = -0.9516, Validation Loss = -0.9674
Checkpoint saved at checkpoint_epoch_6_2025-04-29_17-36-01ddpm_300.pt


Epoch 7/20: 100%|██████████| 724/724 [1:51:15<00:00,  9.22s/batch, loss=-0.956]
Validation: 100%|██████████| 170/170 [23:58<00:00,  8.46s/batch, loss=-0.975]


Epoch 7/20: Train Loss = -0.9592, Validation Loss = -0.9691
Checkpoint saved at checkpoint_epoch_7_2025-04-29_19-51-14ddpm_300.pt


Epoch 8/20:  11%|█         | 79/724 [12:03<1:40:14,  9.33s/batch, loss=-0.969]

##預測訓練資料

In [None]:
import numpy as np
# 讀取時
data = np.load('dataset_LSTM.npz')
X_train = data['X_train']
y_train = data['y_train']
X_test  = data['X_test']
y_test  = data['y_test']

print('X_train.shape =', X_train.shape)  # (samples_train, time_steps, features)
print('y_train.shape =', y_train.shape)  # (samples_train,)
print('X_test.shape  =', X_test.shape)   # (samples_test, time_steps, features)
print('y_test.shape  =', y_test.shape)   # (samples_test,)

t_mean = X_train.mean(axis=(0, 2), keepdims=True)  # shape: (1, features, 1)
t_std = X_train.std(axis=(0, 2), keepdims=True)    # shape: (1, features, 1)
v_mean = X_train.mean(axis=(0, 2), keepdims=True)  # shape: (1, features, 1)
v_std = X_train.std(axis=(0, 2), keepdims=True)    # shape: (1, features, 1)
# 正規化
X_train_norm = (X_train - t_mean) / (t_std + 1e-8)
X_val_norm = (y_test - v_mean) / (v_std + 1e-8)

# np.save("X_train_norm.npy", X_train_norm)
# np.save('X_val_norm.npy',X_val_norm)
# 將 NumPy 陣列轉成 PyTorch tensor (float32)
X_train_t = torch.from_numpy(X_train_norm.astype(np.float32))
y_train_t = torch.from_numpy(y_train.astype(np.float32))

X_test_t  = torch.from_numpy(X_val_norm.astype(np.float32))
y_test_t  = torch.from_numpy(y_test.astype(np.float32))


X_train.shape = (92583, 32, 31)
y_train.shape = (92583,)
X_test.shape  = (21717, 32, 31)
y_test.shape  = (21717,)


In [None]:
import torch
import torch.nn as nn

class EmbeddingPredictor(nn.Module):
    def __init__(self, encoder, embedding_dim=128, hidden_dims=None, out_dim=1):
        super().__init__()
        self.encoder = encoder

        if hidden_dims is None:
            hidden_dims = [64]

        layers = []
        prev_dim = embedding_dim
        for hd in hidden_dims:
            layers.append(nn.Linear(prev_dim, hd))
            layers.append(nn.ReLU())
            prev_dim = hd

        layers.append(nn.Linear(prev_dim, out_dim))
        self.mlp = nn.Sequential(*layers)

    def forward(self, x):
        with torch.no_grad():
            embedding = self.encoder(x.permute(0,2,1))  # => [B, embedding_dim]
        out = self.mlp(embedding)        # => [B, out_dim]
        return out


In [None]:
from tqdm import tqdm

def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()  # 啟用 train 模式
    total_loss = 0.0

    pbar = tqdm(loader, desc="Train", leave=False)
    for batch_x, batch_y in pbar:
        batch_x = batch_x.to(device)

        batch_y = batch_y.to(device).float()

        optimizer.zero_grad()

        y_pred = model(batch_x).squeeze(-1)  # => [B]
        if batch_y.dim() == 2:
            batch_y = batch_y.squeeze(-1)

        loss = criterion(y_pred, batch_y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        pbar.set_postfix({"batch_loss": f"{loss.item():.4f}"})

    avg_loss = total_loss / len(loader)
    return avg_loss
def eval_one_epoch(model, loader, criterion, device, desc="Eval"):
    """
    通用的 "驗證/測試" 函式:
      - model.eval()
      - 不做反向傳播
      - 只計算平均損失 (回歸可用 MSELoss)

    desc: "Valid" or "Test" 或其他字串，給 tqdm 用
    """
    model.eval()
    total_loss = 0.0

    pbar = tqdm(loader, desc=desc, leave=False)
    with torch.no_grad():
        for batch_x, batch_y in pbar:
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device).float()

            y_pred = model(batch_x).squeeze(-1)
            if batch_y.dim() == 2:
                batch_y = batch_y.squeeze(-1)

            loss = criterion(y_pred, batch_y)
            total_loss += loss.item()
            pbar.set_postfix({"batch_loss": f"{loss.item():.4f}"})

    avg_loss = total_loss / len(loader)
    return avg_loss


In [None]:
from torch.utils.data import Dataset, DataLoader, TensorDataset
train_dataset = TensorDataset(X_train_t, y_train_t)
test_dataset  = TensorDataset(X_test_t,  y_test_t)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_dataset,  batch_size=64, shuffle=False)

In [None]:
device = 'cpu'#torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_dim = 31  # 特徵數
simsiam_model = SimSiamForTimeSeries(input_dim).to(device)
checkpoint = torch.load('/content/checkpoint_epoch_9_2025-04-25_14-35-03tran_augment.pt', map_location=device)
simsiam_model.load_state_dict(checkpoint["simsiam_model_state_dict"])
encoder = simsiam_model.encoder

In [None]:
import torch
import torch.optim as optim
import torch.nn as nn

device = "cuda" if torch.cuda.is_available() else "cpu"

# 假設你已經有 encoder (可凍結或可微調)
# 這裡預設 freeze encoder
for p in encoder.parameters():
    p.requires_grad = False

# 建立 EmbeddingPredictor
model = EmbeddingPredictor(
    encoder=encoder,
    embedding_dim=128,
    hidden_dims=[256, 128, 64],
    out_dim=1
).to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

num_epochs = 20
best_val_loss = float('inf')  # 紀錄歷史最佳驗證損失

for epoch in range(num_epochs):
    print(f"Epoch [{epoch+1}/{num_epochs}]")

    # 1) train
    train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device)

    # 2) validate
    val_loss = eval_one_epoch(model, train_loader, criterion, device, desc="Valid")

    print(f"  Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")

    # 如果想 early stopping 或 save best model:
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        # 保存最佳參數
        torch.save(model.state_dict(), "best_model.pth")
        print("  (New best model saved!)")

# =============== 最後做 test =================
# 載入最佳權重 (可選)
model.load_state_dict(torch.load("best_model.pth"))

# 執行測試
test_loss = eval_one_epoch(model, test_loader, criterion, device, desc="Test")
print(f"Final Test Loss = {test_loss:.4f}")


Epoch [1/20]




  Train Loss = 10.7562, Val Loss = 10.7374
  (New best model saved!)
Epoch [2/20]




KeyboardInterrupt: 

##二元分類

embedding+MLP

In [None]:
import torch
import torch.nn as nn

class EmbeddingPredictor(nn.Module):
    def __init__(self, encoder, embedding_dim=128, hidden_dims=None, out_dim=2):  # ⭐ out_dim=2
        super().__init__()
        self.encoder = encoder

        if hidden_dims is None:
            hidden_dims = [128,64]

        layers = []
        prev_dim = embedding_dim
        for hd in hidden_dims:
            layers.append(nn.Linear(prev_dim, hd))
            layers.append(nn.ReLU())
            prev_dim = hd

        layers.append(nn.Linear(prev_dim, out_dim))  # ⭐ 輸出2個 logits
        self.mlp = nn.Sequential(*layers)

    def forward(self, x):
        with torch.no_grad():
            embedding = self.encoder(x.permute(0, 2, 1))  # [B, embedding_dim]
        logits = self.mlp(embedding)  # [B, 2]
        return logits



MLP only

In [None]:
import torch
import torch.nn as nn

class FlattenMLP_BinaryCE(nn.Module):
    def __init__(self, time_steps, feature_dim, hidden_dims=[128, 64], out_dim=2):
        """
        time_steps: T
        feature_dim: F
        hidden_dims: list, e.g. [128,64]
        out_dim=2 -> 二元分類 (class=0, class=1)
        """
        super().__init__()

        self.input_dim = time_steps * feature_dim
        layers = []
        prev_dim = self.input_dim

        # 建立中間隱藏層
        for hd in hidden_dims:
            layers.append(nn.Linear(prev_dim, hd))
            layers.append(nn.ReLU())
            prev_dim = hd

        # 最後一層 -> out_dim=2
        layers.append(nn.Linear(prev_dim, out_dim))

        self.mlp = nn.Sequential(*layers)

    def forward(self, x):
        """
        x: shape = [B, T, F]
        return: logits => [B, 2]
        """
        B, T, F = x.shape
        # flatten => [B, T*F]
        x_flat = x.view(B, T * F)

        # 透過 MLP 得到 logits => [B,2]
        logits = self.mlp(x_flat)
        return logits


##將embedding加入為特徵

In [None]:
import torch
import torch.nn as nn

class EmbeddingConcatPredictor(nn.Module):
    def __init__(self, encoder, input_dim, seq_len, embedding_dim=128, hidden_dims=None, out_dim=2):
        """
        - encoder: 提供抽取 embedding
        - input_dim: 原始 feature 數 (31)
        - seq_len: 時間長度 (32)
        - embedding_dim: encoder 輸出向量維度 (128)
        - hidden_dims: MLP 隱藏層設定
        - out_dim: 預測類別數（默認2類）
        """
        super().__init__()
        self.encoder = encoder
        self.input_dim = input_dim
        self.seq_len = seq_len
        self.embedding_dim = embedding_dim

        self.flatten_dim = input_dim * seq_len
        self.concat_dim = self.flatten_dim + embedding_dim

        if hidden_dims is None:
            hidden_dims = [256, 128]  # 根據 concat size 自訂義

        layers = []
        prev_dim = self.concat_dim
        for hd in hidden_dims:
            layers.append(nn.Linear(prev_dim, hd))
            layers.append(nn.ReLU())
            prev_dim = hd

        layers.append(nn.Linear(prev_dim, out_dim))  # 最後輸出2類
        self.mlp = nn.Sequential(*layers)

    def forward(self, x):
        """
        x: shape (B, features, time) e.g., (B, 31, 32)
        """
        B = x.size(0)

        with torch.no_grad():
            embedding = self.encoder(x.permute(0, 2, 1))  # 取 embedding => (B, embedding_dim)

        x_flatten = x.view(B, -1)  # 攤平成 (B, 31*32)

        x_concat = torch.cat([x_flatten, embedding], dim=1)  # 合併成 (B, 31*32 + embedding_dim)

        logits = self.mlp(x_concat)  # 送進 MLP 做分類
        return logits  # logits (B, 2)


In [None]:
##新的trainer
import torch
import torch.nn as nn
import os
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

class Trainer:
    def __init__(self, model, optimizer, device,
                 train_loader, val_loader=None,
                 scheduler=None, save_path=None, early_stopping_patience=5):
        self.model = model
        self.optimizer = optimizer
        self.device = device
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.scheduler = scheduler
        self.save_path = save_path
        self.early_stopping_patience = early_stopping_patience

        self.best_val_f1 = 0.0
        self.early_stopping_counter = 0

        if save_path:
            os.makedirs(os.path.dirname(save_path), exist_ok=True)

    def train_one_epoch(self):
        self.model.train()
        criterion = nn.CrossEntropyLoss()
        total_loss = 0.0
        total_correct = 0
        total_samples = 0

        pbar = tqdm(self.train_loader, desc="Train", leave=False)
        for batch_x, batch_y in pbar:
            batch_x = batch_x.to(self.device)
            batch_y = batch_y.to(self.device).long()

            self.optimizer.zero_grad()
            logits = self.model(batch_x)
            loss = criterion(logits, batch_y)
            loss.backward()
            self.optimizer.step()

            total_loss += loss.item()
            preds = logits.argmax(dim=1)
            correct = (preds == batch_y).sum().item()

            total_correct += correct
            total_samples += batch_y.size(0)

            batch_acc = correct / batch_y.size(0)
            pbar.set_postfix({
                "batch_loss": f"{loss.item():.4f}",
                "batch_acc":  f"{batch_acc:.4f}"
            })

        avg_loss = total_loss / len(self.train_loader)
        avg_acc = total_correct / total_samples
        return avg_loss, avg_acc

    def eval_one_epoch(self, desc="Eval"):
        self.model.eval()
        criterion = nn.CrossEntropyLoss()
        total_loss = 0.0
        total_correct = 0
        total_samples = 0

        all_preds = []
        all_probs = []
        all_labels = []

        pbar = tqdm(self.val_loader, desc=desc, leave=False)
        with torch.no_grad():
            for batch_x, batch_y in pbar:
                batch_x = batch_x.to(self.device)
                batch_y = batch_y.to(self.device).long()

                logits = self.model(batch_x)
                loss = criterion(logits, batch_y)

                total_loss += loss.item()

                probs = torch.softmax(logits, dim=1)[:, 1]  # positive class probability
                preds = logits.argmax(dim=1)

                all_probs.append(probs.cpu())
                all_preds.append(preds.cpu())
                all_labels.append(batch_y.cpu())

                correct = (preds == batch_y).sum().item()
                total_correct += correct
                total_samples += batch_y.size(0)

                batch_acc = correct / batch_y.size(0)
                pbar.set_postfix({
                    "batch_loss": f"{loss.item():.4f}",
                    "batch_acc":  f"{batch_acc:.4f}"
                })

        avg_loss = total_loss / len(self.val_loader)
        avg_acc = total_correct / total_samples

        all_preds = torch.cat(all_preds).numpy()
        all_probs = torch.cat(all_probs).numpy()
        all_labels = torch.cat(all_labels).numpy()

        precision = precision_score(all_labels, all_preds)
        recall = recall_score(all_labels, all_preds)
        f1 = f1_score(all_labels, all_preds)
        roc_auc = roc_auc_score(all_labels, all_probs)

        return avg_loss, avg_acc, precision, recall, f1, roc_auc

    def fit(self, num_epochs):
        for epoch in range(1, num_epochs + 1):
            print(f"Epoch {epoch}/{num_epochs}")

            train_loss, train_acc = self.train_one_epoch()

            if self.val_loader:
                val_loss, val_acc, precision, recall, f1, roc_auc = self.eval_one_epoch()

                # 如果 validation 有進步（以F1為基準）
                if f1 > self.best_val_f1:
                    self.best_val_f1 = f1
                    self.early_stopping_counter = 0
                    if self.save_path:
                        torch.save(self.model.state_dict(), self.save_path)
                        print(f"✅ Saved new best model to {self.save_path}")
                else:
                    self.early_stopping_counter += 1
                    print(f"⚠️ EarlyStopping Counter: {self.early_stopping_counter}/{self.early_stopping_patience}")

                # Early Stopping
                if self.early_stopping_counter >= self.early_stopping_patience:
                    print("⛔ Early stopping triggered!")
                    break

                # Scheduler update
                if self.scheduler:
                    if isinstance(self.scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
                        self.scheduler.step(val_loss)
                    else:
                        self.scheduler.step()

                # 印出完整指標
                print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
                print(f"Val   Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")
                print(f"Precision: {precision:.4f} | Recall: {recall:.4f} | F1: {f1:.4f} | ROC-AUC: {roc_auc:.4f}")
                print("-" * 50)
            else:
                # 沒有 validation 的情況
                print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
                print("-" * 50)


In [None]:
import torch
import numpy as np
# 讀取時
data = np.load('/content/dataset_LSTM_binary.npz')
X_train = data['X_train']
y_train = data['y_train']
X_test  = data['X_test']
y_test  = data['y_test']

print('X_train.shape =', X_train.shape)  # (samples_train, time_steps, features)
print('y_train.shape =', y_train.shape)  # (samples_train,)
print('X_test.shape  =', X_test.shape)   # (samples_test, time_steps, features)
print('y_test.shape  =', y_test.shape)   # (samples_test,)

t_mean = X_train.mean(axis=(0, 2), keepdims=True)  # shape: (1, features, 1)
t_std = X_train.std(axis=(0, 2), keepdims=True)    # shape: (1, features, 1)
v_mean = X_train.mean(axis=(0, 2), keepdims=True)  # shape: (1, features, 1)
v_std = X_train.std(axis=(0, 2), keepdims=True)    # shape: (1, features, 1)
# 正規化
X_train_norm = (X_train - t_mean) / (t_std + 1e-8)
X_val_norm = (X_test - v_mean) / (v_std + 1e-8)
# 將 NumPy 陣列轉成 PyTorch tensor (float32)
X_train_t = torch.from_numpy(X_train_norm.astype(np.float32))
y_train_t = torch.from_numpy(y_train.astype(np.float32))

X_test_t  = torch.from_numpy(X_val_norm.astype(np.float32))
y_test_t  = torch.from_numpy(y_test.astype(np.float32))

X_train.shape = (92583, 32, 31)
y_train.shape = (92583,)
X_test.shape  = (21717, 32, 31)
y_test.shape  = (21717,)


In [None]:
#emb+mlp
device = 'cpu'#torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_dim = 31  # 特徵數
simsiam_model = SimSiamForTimeSeries(input_dim).to(device)
checkpoint = torch.load('/content/checkpoint_epoch_8_2025-04-17_02-12-50ddpm_400-700.pt', map_location=device)
simsiam_model.load_state_dict(checkpoint["simsiam_model_state_dict"])
encoder = simsiam_model.encoder
for p in encoder.parameters():
    p.requires_grad = False
model = EmbeddingPredictor(
    encoder=encoder,
    embedding_dim=128,
    hidden_dims=[256, 128, 64],
    out_dim=2
).to(device)

NameError: name 'SimSiamForTimeSeries' is not defined

In [None]:
# 將emb加入input
device = 'cpu'
simsiam_model = SimSiamForTimeSeries(31).to(device)
checkpoint = torch.load('/content/checkpoint_epoch_13_2025-04-25_15-19-55tran_augment.pt', map_location=device)
simsiam_model.load_state_dict(checkpoint["simsiam_model_state_dict"])
encoder = simsiam_model.encoder
model = EmbeddingConcatPredictor(
    encoder=encoder,
    input_dim=31,
    seq_len=32,
    embedding_dim=128,
    hidden_dims=[512, 256],
    out_dim=2
).to(device)




In [None]:
# (B) 建構 FlattenMLP_BinaryCE
model = FlattenMLP_BinaryCE(
    time_steps=32,
    feature_dim=31,
    hidden_dims=[128,64],
    out_dim=2  # 二分類 => 2
).to(device)


In [None]:
#700
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim


device = "cuda" if torch.cuda.is_available() else "cpu"

train_dataset = TensorDataset(X_train_t, y_train_t)
test_dataset  = TensorDataset(X_test_t,  y_test_t)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader  = DataLoader(test_dataset,  batch_size=32, shuffle=False)

# (C) Optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-3)
# 假設你已經有這些：
# model, optimizer, train_loader, val_loader

trainer = Trainer(
    model=model,
    optimizer=optimizer,
    device=device,
    train_loader=train_loader,
    val_loader=test_loader,
    scheduler=None,  # 如果有學習率調整器也可以放
    save_path="./checkpoints/best_model.pth",
    early_stopping_patience=5
)

# 開始訓練
trainer.fit(num_epochs=50)

Epoch 1/50




✅ Saved new best model to ./checkpoints/best_model.pth
Train Loss: 1.2622 | Train Acc: 0.8582
Val   Loss: 1.3155 | Val Acc: 0.7079
Precision: 0.7878 | Recall: 0.6681 | F1: 0.7230 | ROC-AUC: 0.7612
--------------------------------------------------
Epoch 2/50




✅ Saved new best model to ./checkpoints/best_model.pth
Train Loss: 0.2405 | Train Acc: 0.9005
Val   Loss: 2.2422 | Val Acc: 0.7034
Precision: 0.7707 | Recall: 0.6835 | F1: 0.7245 | ROC-AUC: 0.7770
--------------------------------------------------
Epoch 3/50




✅ Saved new best model to ./checkpoints/best_model.pth
Train Loss: 0.2261 | Train Acc: 0.9065
Val   Loss: 1.3048 | Val Acc: 0.7136
Precision: 0.7907 | Recall: 0.6775 | F1: 0.7297 | ROC-AUC: 0.7958
--------------------------------------------------
Epoch 4/50




✅ Saved new best model to ./checkpoints/best_model.pth
Train Loss: 0.1672 | Train Acc: 0.9301
Val   Loss: 1.4941 | Val Acc: 0.7146
Precision: 0.7725 | Recall: 0.7085 | F1: 0.7391 | ROC-AUC: 0.7889
--------------------------------------------------
Epoch 5/50




✅ Saved new best model to ./checkpoints/best_model.pth
Train Loss: 0.1962 | Train Acc: 0.9221
Val   Loss: 1.9612 | Val Acc: 0.7033
Precision: 0.7378 | Recall: 0.7448 | F1: 0.7413 | ROC-AUC: 0.7592
--------------------------------------------------
Epoch 6/50




✅ Saved new best model to ./checkpoints/best_model.pth
Train Loss: 0.2352 | Train Acc: 0.8928
Val   Loss: 2.8648 | Val Acc: 0.7241
Precision: 0.7532 | Recall: 0.7683 | F1: 0.7607 | ROC-AUC: 0.7854
--------------------------------------------------
Epoch 7/50




⚠️ EarlyStopping Counter: 1/5
Train Loss: 0.1771 | Train Acc: 0.9370
Val   Loss: 1.8611 | Val Acc: 0.7251
Precision: 0.7944 | Recall: 0.6994 | F1: 0.7439 | ROC-AUC: 0.7980
--------------------------------------------------
Epoch 8/50




⚠️ EarlyStopping Counter: 2/5
Train Loss: 0.1634 | Train Acc: 0.9334
Val   Loss: 4.4843 | Val Acc: 0.7131
Precision: 0.7636 | Recall: 0.7202 | F1: 0.7413 | ROC-AUC: 0.7748
--------------------------------------------------
Epoch 9/50




⚠️ EarlyStopping Counter: 3/5
Train Loss: 0.1465 | Train Acc: 0.9263
Val   Loss: 3.5406 | Val Acc: 0.7321
Precision: 0.7861 | Recall: 0.7289 | F1: 0.7564 | ROC-AUC: 0.7893
--------------------------------------------------
Epoch 10/50




⚠️ EarlyStopping Counter: 4/5
Train Loss: 0.1497 | Train Acc: 0.9342
Val   Loss: 2.8980 | Val Acc: 0.7256
Precision: 0.7757 | Recall: 0.7303 | F1: 0.7523 | ROC-AUC: 0.7826
--------------------------------------------------
Epoch 11/50




✅ Saved new best model to ./checkpoints/best_model.pth
Train Loss: 0.1209 | Train Acc: 0.9482
Val   Loss: 8.2710 | Val Acc: 0.7360
Precision: 0.7327 | Recall: 0.8460 | F1: 0.7853 | ROC-AUC: 0.7813
--------------------------------------------------
Epoch 12/50




⚠️ EarlyStopping Counter: 1/5
Train Loss: 0.1609 | Train Acc: 0.9425
Val   Loss: 3.4035 | Val Acc: 0.7019
Precision: 0.7596 | Recall: 0.6988 | F1: 0.7279 | ROC-AUC: 0.7611
--------------------------------------------------
Epoch 13/50




⚠️ EarlyStopping Counter: 2/5
Train Loss: 0.1541 | Train Acc: 0.9480
Val   Loss: 7.8411 | Val Acc: 0.7085
Precision: 0.7573 | Recall: 0.7198 | F1: 0.7381 | ROC-AUC: 0.7588
--------------------------------------------------
Epoch 14/50




⚠️ EarlyStopping Counter: 3/5
Train Loss: 0.1399 | Train Acc: 0.9541
Val   Loss: 1.7921 | Val Acc: 0.7200
Precision: 0.7592 | Recall: 0.7461 | F1: 0.7526 | ROC-AUC: 0.7748
--------------------------------------------------
Epoch 15/50




⚠️ EarlyStopping Counter: 4/5
Train Loss: 0.1466 | Train Acc: 0.9451
Val   Loss: 4.9792 | Val Acc: 0.6729
Precision: 0.7713 | Recall: 0.6067 | F1: 0.6792 | ROC-AUC: 0.7639
--------------------------------------------------
Epoch 16/50


                                                                                            

⚠️ EarlyStopping Counter: 5/5
⛔ Early stopping triggered!




In [None]:
#500
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim


device = "cuda" if torch.cuda.is_available() else "cpu"

train_dataset = TensorDataset(X_train_t, y_train_t)
test_dataset  = TensorDataset(X_test_t,  y_test_t)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader  = DataLoader(test_dataset,  batch_size=32, shuffle=False)

# (C) Optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-3)
# 假設你已經有這些：
# model, optimizer, train_loader, val_loader

trainer = Trainer(
    model=model,
    optimizer=optimizer,
    device=device,
    train_loader=train_loader,
    val_loader=test_loader,
    scheduler=None,  # 如果有學習率調整器也可以放
    save_path="./checkpoints/best_model.pth",
    early_stopping_patience=5
)

# 開始訓練
trainer.fit(num_epochs=50)

Epoch 1/50




✅ Saved new best model to ./checkpoints/best_model.pth
Train Loss: 1.6160 | Train Acc: 0.8657
Val   Loss: 1.6534 | Val Acc: 0.6843
Precision: 0.7785 | Recall: 0.6243 | F1: 0.6929 | ROC-AUC: 0.7317
--------------------------------------------------
Epoch 2/50




✅ Saved new best model to ./checkpoints/best_model.pth
Train Loss: 0.2414 | Train Acc: 0.9075
Val   Loss: 1.6870 | Val Acc: 0.7278
Precision: 0.7658 | Recall: 0.7534 | F1: 0.7595 | ROC-AUC: 0.7656
--------------------------------------------------
Epoch 3/50




✅ Saved new best model to ./checkpoints/best_model.pth
Train Loss: 0.2117 | Train Acc: 0.9160
Val   Loss: 1.7005 | Val Acc: 0.7450
Precision: 0.7616 | Recall: 0.8051 | F1: 0.7828 | ROC-AUC: 0.7622
--------------------------------------------------
Epoch 4/50




⚠️ EarlyStopping Counter: 1/5
Train Loss: 0.1958 | Train Acc: 0.9282
Val   Loss: 1.6353 | Val Acc: 0.7319
Precision: 0.7761 | Recall: 0.7452 | F1: 0.7603 | ROC-AUC: 0.7760
--------------------------------------------------
Epoch 5/50




⚠️ EarlyStopping Counter: 2/5
Train Loss: 0.1608 | Train Acc: 0.9368
Val   Loss: 1.5511 | Val Acc: 0.7111
Precision: 0.7829 | Recall: 0.6831 | F1: 0.7296 | ROC-AUC: 0.7779
--------------------------------------------------
Epoch 6/50




⚠️ EarlyStopping Counter: 3/5
Train Loss: 0.1720 | Train Acc: 0.9348
Val   Loss: 2.0305 | Val Acc: 0.7320
Precision: 0.7596 | Recall: 0.7759 | F1: 0.7677 | ROC-AUC: 0.7669
--------------------------------------------------
Epoch 7/50




✅ Saved new best model to ./checkpoints/best_model.pth
Train Loss: 0.1615 | Train Acc: 0.9364
Val   Loss: 2.6631 | Val Acc: 0.7643
Precision: 0.7818 | Recall: 0.8142 | F1: 0.7977 | ROC-AUC: 0.7887
--------------------------------------------------
Epoch 8/50




⚠️ EarlyStopping Counter: 1/5
Train Loss: 0.1641 | Train Acc: 0.9443
Val   Loss: 5.6687 | Val Acc: 0.7135
Precision: 0.7869 | Recall: 0.6827 | F1: 0.7311 | ROC-AUC: 0.7832
--------------------------------------------------
Epoch 9/50




✅ Saved new best model to ./checkpoints/best_model.pth
Train Loss: 0.1774 | Train Acc: 0.9410
Val   Loss: 5.8798 | Val Acc: 0.7519
Precision: 0.7235 | Recall: 0.9150 | F1: 0.8080 | ROC-AUC: 0.7560
--------------------------------------------------
Epoch 10/50




⚠️ EarlyStopping Counter: 1/5
Train Loss: 0.1315 | Train Acc: 0.9464
Val   Loss: 2.3389 | Val Acc: 0.7653
Precision: 0.7936 | Recall: 0.7957 | F1: 0.7946 | ROC-AUC: 0.7915
--------------------------------------------------
Epoch 11/50




⚠️ EarlyStopping Counter: 2/5
Train Loss: 0.1751 | Train Acc: 0.9398
Val   Loss: 9.7727 | Val Acc: 0.7501
Precision: 0.7575 | Recall: 0.8266 | F1: 0.7906 | ROC-AUC: 0.7613
--------------------------------------------------
Epoch 12/50




⚠️ EarlyStopping Counter: 3/5
Train Loss: 0.1634 | Train Acc: 0.9403
Val   Loss: 2.9036 | Val Acc: 0.6962
Precision: 0.7998 | Recall: 0.6237 | F1: 0.7009 | ROC-AUC: 0.7863
--------------------------------------------------
Epoch 13/50




⚠️ EarlyStopping Counter: 4/5
Train Loss: 0.1427 | Train Acc: 0.9529
Val   Loss: 4.6599 | Val Acc: 0.7283
Precision: 0.7883 | Recall: 0.7162 | F1: 0.7505 | ROC-AUC: 0.7773
--------------------------------------------------
Epoch 14/50




⚠️ EarlyStopping Counter: 5/5
⛔ Early stopping triggered!


In [None]:
#trandition
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim


device = "cuda" if torch.cuda.is_available() else "cpu"

train_dataset = TensorDataset(X_train_t, y_train_t)
test_dataset  = TensorDataset(X_test_t,  y_test_t)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader  = DataLoader(test_dataset,  batch_size=32, shuffle=False)

# (C) Optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-3)
# 假設你已經有這些：
# model, optimizer, train_loader, val_loader

trainer = Trainer(
    model=model,
    optimizer=optimizer,
    device=device,
    train_loader=train_loader,
    val_loader=test_loader,
    scheduler=None,  # 如果有學習率調整器也可以放
    save_path="./checkpoints/best_model.pth",
    early_stopping_patience=5
)

# 開始訓練
trainer.fit(num_epochs=50)

Epoch 1/50




✅ Saved new best model to ./checkpoints/best_model.pth
Train Loss: 0.2635 | Train Acc: 0.8807
Val   Loss: 1.1603 | Val Acc: 0.7078
Precision: 0.7432 | Recall: 0.7457 | F1: 0.7445 | ROC-AUC: 0.7470
--------------------------------------------------
Epoch 2/50




⚠️ EarlyStopping Counter: 1/5
Train Loss: 0.1552 | Train Acc: 0.9370
Val   Loss: 1.4278 | Val Acc: 0.6937
Precision: 0.7514 | Recall: 0.6924 | F1: 0.7207 | ROC-AUC: 0.7474
--------------------------------------------------
Epoch 3/50




⚠️ EarlyStopping Counter: 2/5
Train Loss: 0.1279 | Train Acc: 0.9478
Val   Loss: 1.4183 | Val Acc: 0.6964
Precision: 0.7456 | Recall: 0.7103 | F1: 0.7276 | ROC-AUC: 0.7497
--------------------------------------------------
Epoch 4/50




⚠️ EarlyStopping Counter: 3/5
Train Loss: 0.1113 | Train Acc: 0.9542
Val   Loss: 1.9330 | Val Acc: 0.6768
Precision: 0.7549 | Recall: 0.6421 | F1: 0.6940 | ROC-AUC: 0.7547
--------------------------------------------------
Epoch 5/50




⚠️ EarlyStopping Counter: 4/5
Train Loss: 0.1025 | Train Acc: 0.9585
Val   Loss: 1.8385 | Val Acc: 0.6552
Precision: 0.7378 | Recall: 0.6140 | F1: 0.6702 | ROC-AUC: 0.7360
--------------------------------------------------
Epoch 6/50




⚠️ EarlyStopping Counter: 5/5
⛔ Early stopping triggered!
