In [4]:
import joblib  # 用于模型保存和加载
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
import joblib
from sklearn.utils import shuffle
from torch.utils.data import Dataset
import os
import datetime
import time
import torchvision.transforms as T
def save_model(model:nn.Module, model_name,epoch):
    save_folder_name = 'checkpoints'
    if not os.path.exists(save_folder_name):
        os.makedirs(save_folder_name)
    # add time information btw
    now = datetime.datetime.now()
    timestamp = now.strftime('%Y%m%d%H%M%S')
    model_path = os.path.join(save_folder_name, f'{model_name}_{epoch}_{timestamp}.pt')
    torch.save(model.state_dict(), model_path)
    
# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class FatigueDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels_p, self.labels_m, self.labels_f = labels  # Separate out the three labels
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], (self.labels_p[idx], self.labels_m[idx], self.labels_f[idx])  # Return all three labels


# 定义LSTM模型
class LSTMModel(nn.Module):
    def __init__(self, input_dim,h_dim=512):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, h_dim, num_layers=1,batch_first=True)
        self.bn1 = nn.BatchNorm1d(h_dim)
        self.fc_for_p = nn.Linear(h_dim, 256)
        self.fc_for_m = nn.Linear(h_dim, 256)
        self.fc_for_f = nn.Linear(h_dim, 256)  # Add fully connected layer for 'F'
        
        self.bn2 = nn.BatchNorm1d(256)
        self.classifier_for_p = nn.Linear(256, 4)  # 4 classes
        self.classifier_for_m = nn.Linear(256,4)   # 4 classes
        self.classifier_for_f = nn.Linear(256,4)   # 4 classes (for 'F')
        self.relu = nn.ReLU()
        self.h_dim = h_dim
    
    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.h_dim).to(device)
        c0 = torch.zeros(1, x.size(0), self.h_dim).to(device)
        out, _ = self.lstm(x, (h0, c0))
        
        out = self.bn1(out[:, -1, :])  # Use the last hidden state
        
        out_p = self.relu(self.fc_for_p(out))
        # out_m = self.relu(self.fc_for_m(out))
        # out_f = self.relu(self.fc_for_f(out))  # Add this for 'F'
        
        out_p = self.bn2(out_p)
        # out_m = self.bn2(out_m)
        # out_f = self.bn2(out_f)  # Add BatchNorm for 'F'
        
        out_p = self.classifier_for_p(out_p)
        # out_m = self.classifier_for_m(out_m)
        # out_f = self.classifier_for_f(out_f)  # Add classifier for 'F'
        
        return out_p#, out_m, out_f  # Return all three outputs


# 定义早停机制
class EarlyStopping:
    def __init__(self, patience=998, delta=0):
        self.patience = patience
        self.delta = delta
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
    
    def __call__(self, val_loss, model):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0
    
    def save_checkpoint(self, val_loss, model):
        torch.save(model.state_dict(), 'LSTM_model.pt')
        self.val_loss_min = val_loss

# 加载数据
train_df1 = pd.read_csv('FAA_train.csv')
#train_df2 = pd.read_csv('CNNMEFAR60_train+.csv')
train_df = pd.concat([train_df1])

test_df1 = pd.read_csv('FAA_test.csv')
#test_df2 = pd.read_csv('CNNMEFAR40_test+.csv')
test_df = pd.concat([test_df1])

val_df1 = pd.read_csv('FAA_val.csv')
#val_df2 = pd.read_csv('CNNMEFAR60_val+.csv')
val_df = pd.concat([val_df1])


# 特征选择
features = ['bvp','acc_x','acc_y','acc_z','eda','hr','temperature']

X_train = train_df[features]
y_train_p = train_df['Numeric Workload']
#y_train_m = train_df['Numeric Workload']
#y_train_f = train_df['Numeric Workload']

X_val = val_df[features]
y_val_p = val_df['Numeric Workload']
# y_val_m = val_df['Numeric Workload']
# y_val_f = val_df['Numeric Workload']

X_test = test_df[features]
y_test_p = test_df['Numeric Workload']
# y_test_m = test_df['Numeric Workload']
# y_test_f = test_df['Numeric Workload']
# 数据标准化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# 转换为PyTorch张量
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32).to(device)
X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32).to(device)
y_train_tensor_p = torch.tensor(y_train_p.values, dtype=torch.long).to(device)  # Change to long for CE loss
y_val_tensor_p = torch.tensor(y_val_p.values, dtype=torch.long).to(device)  # Change to long for CE loss
y_test_tensor_p = torch.tensor(y_test_p.values, dtype=torch.long).to(device)  # Change to long for CE loss
# y_train_tensor_m = torch.tensor(y_train_m.values, dtype=torch.long).to(device)  # Change to long for CE loss
# y_val_tensor_m = torch.tensor(y_val_m.values, dtype=torch.long).to(device)  # Change to long for CE loss
# y_test_tensor_m = torch.tensor(y_test_m.values, dtype=torch.long).to(device)  # Change to long for CE loss
# y_train_tensor_f = torch.tensor(y_train_f.values, dtype=torch.long).to(device)  # Change to long for CE loss
# y_val_tensor_f = torch.tensor(y_val_f.values, dtype=torch.long).to(device)  # Change to long for CE loss
# y_test_tensor_f = torch.tensor(y_test_f.values, dtype=torch.long).to(device)  # Change to long for CE loss

# 对数据进行LSTM的reshape
X_train_tensor = X_train_tensor.unsqueeze(1)  # 变成 (batch_size, seq_len, input_dim)
X_val_tensor = X_val_tensor.unsqueeze(1)
X_test_tensor = X_test_tensor.unsqueeze(1)

# 创建数据加载器
# train_dataset = FatigueDataset(X_train_tensor, (y_train_tensor_p, y_train_tensor_m, y_train_tensor_f))
# val_dataset = FatigueDataset(X_val_tensor, (y_val_tensor_p, y_val_tensor_m, y_val_tensor_f))
# test_dataset = FatigueDataset(X_test_tensor, (y_test_tensor_p, y_test_tensor_m, y_test_tensor_f))

train_dataset = FatigueDataset(X_train_tensor, y_train_tensor_p)
val_dataset = FatigueDataset(X_val_tensor, y_val_tensor_p)
test_dataset = FatigueDataset(X_test_tensor, y_test_tensor_p)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 初始化模型、损失函数和优化器
model = LSTMModel(input_dim=X_train.shape[1]).to(device)
criterion = nn.CrossEntropyLoss()  # Change to CrossEntropyLoss
optimizer = optim.AdamW(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True)
early_stopping = EarlyStopping(patience=5, delta=0.001)

# 训练模型
num_epochs = 998
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    correct_p = 0
    correct_m = 0
    correct_f = 0
    total_p = 0
    total_m = 0
    total_f = 0
    with tqdm(total=len(train_loader), desc=f"Epoch {epoch + 1}/{num_epochs}") as pbar:
        for X_batch, y_batch in train_loader:
            y_p, y_m ,y_f= y_batch
            optimizer.zero_grad()
            output_p, output_m , output_f= model(X_batch)
            loss_1 = criterion(output_p, y_p)
            loss_2 = criterion(output_m, y_m)
            loss_3 = criterion(output_f, y_f)
            loss = loss_1 + loss_2 + loss_3
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted_p = torch.max(output_p.data, 1)
            _, predicted_m = torch.max(output_m.data, 1)
            _, predicted_f = torch.max(output_f.data, 1)
            total_p += y_p.size(0)
            total_m += y_m.size(0)
            total_f += y_f.size(0)
            correct_p += (predicted_p == y_p).sum().item()
            correct_m += (predicted_m == y_m).sum().item()
            correct_f += (predicted_f == y_f).sum().item()
            
            pbar.update(1)
    
    train_accuracy_p = correct_p / total_p
    train_accuracy_m = correct_m / total_m
    train_accuracy_f = correct_f / total_f
    if epoch % 5 == 0:
        save_model(model, 'LSTM', epoch)
    scheduler.step(train_loss)
    
    # 验证模型
    model.eval()
    val_loss = 0.0
    correct_p = 0
    correct_m = 0
    total_p = 0
    total_m = 0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            y_p, y_m,y_f = y_batch
            output_p, output_m,output_f = model(X_batch)
            loss_1 = criterion(output_p, y_p)
            loss_2 = criterion(output_m, y_m)
            loss_3 = criterion(output_f, y_f)
            loss = loss_1 + loss_2+loss_3
    
            
            val_loss += loss.item()
            _, predicted_p = torch.max(output_p.data, 1)
            _, predicted_m = torch.max(output_m.data, 1)
            _, predicted_f = torch.max(output_f.data, 1)
            total_p += y_p.size(0)
            total_m += y_m.size(0)
            total_f += y_f.size(0)
            correct_p += (predicted_p == y_p).sum().item()
            correct_m += (predicted_m == y_m).sum().item()
            correct_f += (predicted_f == y_f).sum().item()
    
    val_accuracy_p = correct_p / total_p
    val_accuracy_m = correct_m / total_m
    val_accuracy_f = correct_f / total_f
    val_accuracy = (correct_p + correct_m+correct_f) / (total_p + total_m+total_f)
    
    print(f"Epoch {epoch + 1}, Train Loss: {train_loss:.4f}, Train Accuracy P: {train_accuracy_p:.4f}, Train Accuracy M: {train_accuracy_m:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")
    
    early_stopping(val_loss, model)
    
    if early_stopping.early_stop:
        print("Early stopping")
        break

# 加载最佳模型
model.load_state_dict(torch.load('LSTM_model.pt'))
joblib.dump(scaler, 'LSTMscaler.pkl')

# 预测
model.eval()
y_pred_proba_p = []
y_pred_proba_m = []
y_pred_proba_f = []
y_true_p = []
y_true_m = []
y_true_f = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        y_p, y_m,y_f = y_batch
        output_p, output_m,output_f = model(X_batch)
        y_pred_proba_p.extend(output_p.cpu().numpy())
        y_pred_proba_m.extend(output_m.cpu().numpy())
        y_pred_proba_f.extend(output_f.cpu().numpy())
        y_true_p.extend(y_p.cpu().numpy())
        y_true_m.extend(y_m.cpu().numpy())
        y_true_f.extend(y_f.cpu().numpy())

y_pred_p = np.argmax(y_pred_proba_p, axis=1)
y_pred_m = np.argmax(y_pred_proba_m, axis=1)
y_pred_f = np.argmax(y_pred_proba_f, axis=1)

# 评估模型
accuracy_p = accuracy_score(y_true_p, y_pred_p)
accuracy_m = accuracy_score(y_true_m, y_pred_m)
accuracy_f = accuracy_score(y_true_f, y_pred_f)
print(f"LSTM Model Accuracy P: {accuracy_p:.4f}")
print(f"LSTM Model Accuracy M: {accuracy_m:.4f}")
print(f"LSTM Model Accuracy F: {accuracy_f:.4f}")

Epoch 1/998: 100%|██████████| 7248/7248 [00:23<00:00, 303.30it/s]


Epoch 1, Train Loss: nan, Train Accuracy P: 0.8733, Train Accuracy M: 0.8757, Val Loss: nan, Val Accuracy: 0.6922


Epoch 2/998: 100%|██████████| 7248/7248 [00:24<00:00, 294.06it/s]


Epoch 2, Train Loss: nan, Train Accuracy P: 0.5695, Train Accuracy M: 0.5695, Val Loss: nan, Val Accuracy: 0.5695


Epoch 3/998: 100%|██████████| 7248/7248 [00:24<00:00, 294.05it/s]


Epoch 3, Train Loss: nan, Train Accuracy P: 0.5695, Train Accuracy M: 0.5695, Val Loss: nan, Val Accuracy: 0.5695


Epoch 4/998:  15%|█▌        | 1119/7248 [00:03<00:21, 284.61it/s]


KeyboardInterrupt: 

In [3]:
import joblib
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from tqdm import tqdm
import os
import datetime

def save_model(model: nn.Module, model_name, epoch):
    save_folder_name = 'checkpoints'
    if not os.path.exists(save_folder_name):
        os.makedirs(save_folder_name)
    now = datetime.datetime.now()
    timestamp = now.strftime('%Y%m%d%H%M%S')
    model_path = os.path.join(save_folder_name, f'{model_name}_{epoch}_{timestamp}.pt')
    torch.save(model.state_dict(), model_path)

# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class FatigueDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

# 定义LSTM模型
class LSTMModel(nn.Module):
    def __init__(self, input_dim, h_dim=512):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, h_dim, num_layers=1, batch_first=True)
        self.bn1 = nn.BatchNorm1d(h_dim)
        self.fc = nn.Linear(h_dim, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.classifier = nn.Linear(256, 4)  # 4 classes
        self.relu = nn.ReLU()
        self.h_dim = h_dim
    
    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.h_dim).to(device)
        c0 = torch.zeros(1, x.size(0), self.h_dim).to(device)
        out, _ = self.lstm(x, (h0, c0))
        
        out = self.bn1(out[:, -1, :])  # Use the last hidden state
        out = self.relu(self.fc(out))
        out = self.bn2(out)
        out = self.classifier(out)
        
        return out

# 定义早停机制
class EarlyStopping:
    def __init__(self, patience=5, delta=0):
        self.patience = patience
        self.delta = delta
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
    
    def __call__(self, val_loss, model):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0
    
    def save_checkpoint(self, val_loss, model):
        torch.save(model.state_dict(), 'LSTM_model.pt')
        self.val_loss_min = val_loss

# 加载数据
train_df = pd.read_csv('FAA_train.csv')
test_df = pd.read_csv('FAA_test.csv')
val_df = pd.read_csv('FAA_val.csv')

# 特征选择
features = ['bvp', 'acc_x', 'acc_y', 'acc_z', 'eda', 'hr', 'temperature']

X_train = train_df[features]
y_train = train_df['Numeric Workload']
X_val = val_df[features]
y_val = val_df['Numeric Workload']
X_test = test_df[features]
y_test = test_df['Numeric Workload']

# 数据标准化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# 转换为PyTorch张量
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32).to(device)
X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long).to(device)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.long).to(device)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long).to(device)

# 对数据进行LSTM的reshape
X_train_tensor = X_train_tensor.unsqueeze(1)  # 变成 (batch_size, seq_len, input_dim)
X_val_tensor = X_val_tensor.unsqueeze(1)
X_test_tensor = X_test_tensor.unsqueeze(1)

# 创建数据加载器
train_dataset = FatigueDataset(X_train_tensor, y_train_tensor)
val_dataset = FatigueDataset(X_val_tensor, y_val_tensor)
test_dataset = FatigueDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 初始化模型、损失函数和优化器
model = LSTMModel(input_dim=X_train.shape[1]).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True)
early_stopping = EarlyStopping(patience=5, delta=0.001)

# 训练模型
num_epochs = 998
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    correct = 0
    total = 0
    with tqdm(total=len(train_loader), desc=f"Epoch {epoch + 1}/{num_epochs}") as pbar:
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output, y_batch)
            if loss.item() == float('inf'):
                print(X_batch)
                print(y_batch)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
            
            pbar.update(1)
    
    train_accuracy = correct / total
    if epoch % 5 == 0:
        save_model(model, 'LSTM', epoch)
    scheduler.step(train_loss)
    
    # 验证模型
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            output = model(X_batch)
            loss = criterion(output, y_batch)
            val_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
    
    val_accuracy = correct / total
    print(f"Epoch {epoch + 1}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")
    
    early_stopping(val_loss, model)
    
    if early_stopping.early_stop:
        print("Early stopping")
        break

# 加载最佳模型
model.load_state_dict(torch.load('LSTM_model.pt'))
joblib.dump(scaler, 'LSTMscaler.pkl')

# 预测
model.eval()
y_pred_proba = []
y_true = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        output = model(X_batch)
        y_pred_proba.extend(output.cpu().numpy())
        y_true.extend(y_batch.cpu().numpy())

y_pred = np.argmax(y_pred_proba, axis=1)

# 评估模型
accuracy = accuracy_score(y_true, y_pred)
print(f"LSTM Model Accuracy: {accuracy:.4f}")


Epoch 1/998:  71%|███████▏  | 5176/7248 [00:17<00:07, 270.59it/s]