In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from memory_profiler import profile
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class ActivityDataset(Dataset):
    def __init__(self, file_path):
        self.df = pd.read_excel(file_path)

        self.df = self.df.groupby('Activity').apply(lambda x: x.sample(frac=0.1)).reset_index(drop=True)

        # 分割特征和标签
        self.features = self.df.drop(['subject', 'Activity', 'ActivityName'], axis=1).values
        self.labels = self.df['Activity'].values

        # 特征归一化
        self.scaler = StandardScaler()
        self.features = self.scaler.fit_transform(self.features)

        # 转换为张量
        self.features = torch.FloatTensor(self.features)
        self.labels = torch.LongTensor(self.labels)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]


In [3]:
# 加载数据
train = r'E:\A_workbench\A-lab\25-2-4\Human-Activity-Recognition-master\my_data\train_dataset.xlsx'
test = r'E:\A_workbench\A-lab\25-2-4\Human-Activity-Recognition-master\my_data\test_dataset.xlsx'
train_dataset = ActivityDataset(train)
test_dataset = ActivityDataset(test)

# 创建 DataLoader
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [4]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim, dropout=0.2):
        super(LSTMModel, self).__init__()

        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        # LSTM层
        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout
        )

        # Batch Normalization
        self.batch_norm = nn.BatchNorm1d(hidden_dim)

        # Dropout
        self.dropout = nn.Dropout(dropout)

        # 全连接层
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # 添加序列维度 (batch_size, 1, input_dim)
        x = x.unsqueeze(1)  # 关键修改：将2D输入转换为3D

        # 初始化隐藏状态和细胞状态
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)

        # 前向传播
        out, (h_n, c_n) = self.lstm(x, (h0, c0))

        # 取最后一个时间步的输出
        out = out[:, -1, :]

        # Batch Normalization和Dropout
        out = self.batch_norm(out)
        out = self.dropout(out)

        # 全连接层
        out = self.fc(out)

        return out


In [5]:
# 获取输入维度和输出维度
input_dim = train_dataset.features.shape[1]
output_dim = len(torch.unique(train_dataset.labels))

# Check the number of unique classes
num_classes = len(torch.unique(train_dataset.labels))
print(f"Number of classes: {num_classes}")

# Remap labels to be zero-indexed if necessary
unique_labels = torch.unique(train_dataset.labels)
label_mapping = {label.item(): idx for idx, label in enumerate(unique_labels)}

train_dataset.labels = torch.tensor([label_mapping[label.item()] for label in train_dataset.labels])
test_dataset.labels = torch.tensor([label_mapping[label.item()] for label in test_dataset.labels])


Number of classes: 5


In [6]:
# 初始化模型
model = LSTMModel(
    input_dim=input_dim,
    hidden_dim=128,
    num_layers=2,
    output_dim=output_dim,
    dropout=0.2
)

# 打印模型结构
print(model)

LSTMModel(
  (lstm): LSTM(880, 128, num_layers=2, batch_first=True, dropout=0.2)
  (batch_norm): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (fc): Linear(in_features=128, out_features=5, bias=True)
)


In [7]:
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# 训练设备选择（GPU 或 CPU）
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# 训练循环
num_epochs = 50
best_val_acc = 0.0


In [8]:

# 记录训练时间
start_time = time.time()
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    train_acc = 0.0

    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        # 前向传播
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 计算准确率
        _, preds = torch.max(outputs, dim=1)
        train_acc += (preds == labels).sum().item()
        train_loss += loss.item() * inputs.size(0)

    # 更新学习率
    scheduler.step()

    # 计算平均损失和准确率
    train_loss /= len(train_loader.dataset)
    train_acc /= len(train_loader.dataset)

    # 验证阶段
    model.eval()
    val_loss = 0.0
    val_acc = 0.0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            _, preds = torch.max(outputs, dim=1)
            val_acc += (preds == labels).sum().item()
            val_loss += loss.item() * inputs.size(0)

    val_loss /= len(test_loader.dataset)
    val_acc /= len(test_loader.dataset)

    # 打印训练信息
    print(f'Epoch [{epoch + 1}/{num_epochs}]')
    print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
    print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

    # 保存最佳模型
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'best_activity_model.pth')

# 记录训练结束时间
end_time = time.time()
execution_time = end_time - start_time
print(f'Total Execution Time: {execution_time:.2f} seconds')

Epoch [1/50]
Train Loss: 0.8236, Train Acc: 0.7087
Val Loss: 0.7836, Val Acc: 0.7886
Epoch [2/50]
Train Loss: 0.3673, Train Acc: 0.8720
Val Loss: 0.7106, Val Acc: 0.7780
Epoch [3/50]
Train Loss: 0.2440, Train Acc: 0.9260
Val Loss: 0.6830, Val Acc: 0.8153
Epoch [4/50]
Train Loss: 0.1657, Train Acc: 0.9527
Val Loss: 0.9956, Val Acc: 0.7265
Epoch [5/50]
Train Loss: 0.1202, Train Acc: 0.9630
Val Loss: 0.8021, Val Acc: 0.7815
Epoch [6/50]
Train Loss: 0.0911, Train Acc: 0.9721
Val Loss: 0.9415, Val Acc: 0.7726
Epoch [7/50]
Train Loss: 0.0877, Train Acc: 0.9769
Val Loss: 0.7540, Val Acc: 0.8224
Epoch [8/50]
Train Loss: 0.0587, Train Acc: 0.9842
Val Loss: 1.0161, Val Acc: 0.7620
Epoch [9/50]
Train Loss: 0.0574, Train Acc: 0.9788
Val Loss: 0.9849, Val Acc: 0.7726
Epoch [10/50]
Train Loss: 0.0532, Train Acc: 0.9806
Val Loss: 1.0157, Val Acc: 0.7744
Epoch [11/50]
Train Loss: 0.0543, Train Acc: 0.9854
Val Loss: 0.9716, Val Acc: 0.7922
Epoch [12/50]
Train Loss: 0.0287, Train Acc: 0.9921
Val Loss: 1

In [9]:
# 加载最佳模型
model.load_state_dict(torch.load('best_activity_model.pth'))
model.eval()

# 预测测试集
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        _, preds = torch.max(outputs, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 生成混淆矩阵
cm = confusion_matrix(all_labels, all_preds)
print("Confusion Matrix:")
print(cm)

# 计算准确率
acc = accuracy_score(all_labels, all_preds)
print(f'Test Accuracy: {acc:.4f}')

Confusion Matrix:
[[348  13  12  28   4]
 [ 11  43   3   2   1]
 [  2   1  10   3   6]
 [  3   0   2   5   9]
 [  0   0   0   0  57]]
Test Accuracy: 0.8224


In [10]:
# 使用 torch.autograd.profiler 记录模型运行时间
with torch.autograd.profiler.profile(use_cuda=True) as prof:
    model(inputs)
print(prof.key_averages().table(sort_by="cuda_time_total"))


# 使用 memory_profiler 记录内存消耗
@profile
def memory_consumption():
    model(inputs)


memory_consumption()

--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                            Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                 aten::unsqueeze         7.78%     146.000us         8.00%     150.000us     150.000us             1  
                aten::as_strided         0.59%      11.000us         0.59%      11.000us       0.367us            30  
                     aten::zeros         7.09%     133.000us         7.52%     141.000us      70.500us             2  
                     aten::empty         0.43%       8.000us         0.43%       8.000us       1.000us             8  
                     aten::zero_         0.11%       2.000us         0.11%       2.000us       1.000us             2  
                        aten::to         0.05%  

  warn("CUDA is not available, disabling CUDA profiling")
