In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import YXJ
import torch.nn.functional as F

class TimeSeriesDataset2(Dataset):
    def __init__(self, csv_file, window_size, step_size):
        self.data_frame = pd.read_csv(csv_file) # 读取CSV文件
        self.window_size = window_size
        self.step_size = step_size
        self.samples = self._create_samples()

    def _create_samples(self):
        samples = []
        for start_pos in range(0, len(self.data_frame) - self.window_size + 1, self.step_size):
            end_pos = start_pos + self.window_size
            sample = self.data_frame.iloc[start_pos:end_pos].values
            samples.append(sample)
        return np.array(samples)

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        sample = self.samples[idx]
        X = sample[:, :-1]  # 所有行，除了最后一列
        y = np.mean(sample[:, -1])  # 最后一列的平均值作为标签

        # 转换为Tensor
        X = torch.tensor(X, dtype=torch.float32)
       # X = X.transpose(0, 1)
        y = torch.tensor(y, dtype=torch.float32)
        return X, y

# 使用示例
SAMPLES_PER_GESTURE = 3000
step_size = 100
dataset = TimeSeriesDataset2('normalized_data.csv', SAMPLES_PER_GESTURE,step_size)
GESTURES = ["smoke", "nosmoke"]  # 手势列表
NUM_GESTURES= len(GESTURES);

# 分割数据集
from torch.utils.data import random_split
dataset_size = len(dataset)
train_size = int(dataset_size * 0.7)  # 70% 数据用于训练
valid_size = int(dataset_size * 0.2)  # 20% 数据用于验证
test_size = dataset_size - train_size - valid_size  # 剩余10% 用于测试
batch_size =512;
train_dataset, valid_dataset, test_dataset = random_split(dataset, [train_size, valid_size, test_size])
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size, shuffle=False)

YXJ.check_dataloader(test_loader)
input_size=SAMPLES_PER_GESTURE*9

torch.Size([68, 3000, 9]) tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.3037, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.5023, 0.0000, 0.3563, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.4507, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0563, 0.0000, 0.0000, 0.3300,
        0.0000, 0.0000, 0.3640, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.2053, 0.0000, 0.0000, 0.4507])


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return x

class TransformerModel(nn.Module):
    def __init__(self, input_dim, num_heads, num_encoder_layers, dim_feedforward, max_seq_length, num_classes=1):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Linear(input_dim, dim_feedforward)
        self.pos_encoder = PositionalEncoding(dim_feedforward, max_seq_length)
        encoder_layer = nn.TransformerEncoderLayer(d_model=dim_feedforward, nhead=num_heads, dim_feedforward=dim_feedforward)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)
        self.output_layer = nn.Linear(dim_feedforward, num_classes)

    def forward(self, src):
        src = src.transpose(0, 1)
        src = self.embedding(src) * math.sqrt(dim_feedforward)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        output = self.output_layer(output.mean(dim=0))
        return output

In [3]:
def validate(model, device, criterion, test_loader):
    model.eval()
    validation_loss = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            validation_loss += criterion(output, target).item()
    validation_loss /= len(test_loader)  # 计算平均验证损失
    return validation_loss
    
def train(model, device, train_loader, criterion, optimizer, epochs):
    model.train()
    train_losses = []  # 用于存储每个epoch的损失值
    for epoch in range(epochs):
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()           # 清零梯度
            output = model(data)            # 前向传播
            loss = criterion(output, target) # 计算损失
            loss.backward()                 # 反向传播
            optimizer.step()                # 更新参数
        train_losses.append(loss.item())
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')
    return train_losses
    
def train_vali(model, device, train_loader, test_loader, criterion, optimizer, epochs):
    model.train()
    train_losses = []
    validation_losses = []
    for epoch in range(epochs):
        model.train()
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()  # 清零梯度
            output = model(data)  # 前向传播
            loss = criterion(output, target)  # 计算损失
            loss.backward()  # 反向传播
            optimizer.step()  # 更新参数
        train_losses.append(loss.item())
        
        # 在每个epoch后计算验证集上的损失
        validation_loss = validate(model, device, criterion, test_loader)
        validation_losses.append(validation_loss)
        
        print(f'Epoch {epoch+1}, Training Loss: {loss.item()}, Validation Loss: {validation_loss}')

    return train_losses, validation_losses
    
def test(model, device, test_loader):
    model.eval()  # 将模型设置为评估模式
    test_loss = 0
    with torch.no_grad():  # 在评估阶段不计算梯度
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            # 计算MSE损失，注意这里不需要像之前那样手动计算差异
            # 使用output和target直接计算MSE
            loss = torch.nn.functional.mse_loss(output.squeeze(), target)
            # 累加每个批次的损失
            test_loss += loss.item() * data.size(0)  # 乘以data.size(0)以得到此批次的总损失

    # 计算整个测试集上的平均损失
    test_loss /= len(test_loader.dataset)
    
    print(f'Average MSE Loss on the test set: {test_loss}')

In [None]:
# 使用GPU
use_cuda = torch.cuda.is_available()
device=torch.device('cuda' if use_cuda else 'cpu')

# 模型参数
input_dim = 9  # 输入维度
num_heads = 2  # 多头注意力头数
num_encoder_layers = 2  # 编码器层数
dim_feedforward = 512  # 前馈网络维度
max_seq_length = 3000  # 序列最大长度
num_classes = 1  # 输出类别数

model = TransformerModel(input_dim, num_heads, num_encoder_layers, dim_feedforward, max_seq_length, num_classes).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# 假设 train_loader 和 test_loader 已经定义
train_losses, validation_losses=train_vali(model, device, train_loader, valid_loader, criterion, optimizer, epochs=100)
YXJ.draw_loss(train_losses,validation_losses)
test(model, device, valid_loader)
test(model, device, test_loader)

