In [1]:
import torch
import signatory
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

In [2]:
class SigModel(nn.Module):
    def __init__(self, input_channels, output_classes):
        super(SigModel, self).__init__()
        self.conv1d = nn.Conv1d(input_channels, 5, kernel_size=1, stride=1, padding=1)
        self.fc = nn.Linear(155, output_classes)       
    def forward(self, x):
        # 输入 x 的维度：(batch_size, 256, 768)
        
        # 一维卷积操作
        x = x.permute(0, 2, 1)  
        x = self.conv1d(x)  
        x = x.permute(0, 2, 1)
        x = signatory.signature(x, 3) 
        x = self.fc(x)  
        
        return x

In [3]:
import pickle
#读取数据集
with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session2_pt_特征/data_Session1_w2v2.pkl', 'rb') as f:
    wav2vec_last1 = pickle.load(f)
    print('wav2vec_last1',wav2vec_last1.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session2_pt_特征/data_Session1_label.pkl', 'rb') as f:
    label_last1 = pickle.load(f)
    print('label_last1',label_last1.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session2_pt_特征/data_Session2_w2v2.pkl', 'rb') as f:
    wav2vec_last2 = pickle.load(f)
    print('wav2vec_last2',wav2vec_last2.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session2_pt_特征/data_Session2_label.pkl', 'rb') as f:
    label_last2 = pickle.load(f)
    print('label_last2',label_last2.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session2_pt_特征/data_Session3_w2v2.pkl', 'rb') as f:
    wav2vec_last3 = pickle.load(f)
    print('wav2vec_last3',wav2vec_last3.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session2_pt_特征/data_Session3_label.pkl', 'rb') as f:
    label_last3 = pickle.load(f)
    print('label_last3',label_last3.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session2_pt_特征/data_Session4_w2v2.pkl', 'rb') as f:
    wav2vec_last4 = pickle.load(f)
    print('wav2vec_last4',wav2vec_last4.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session2_pt_特征/data_Session4_label.pkl', 'rb') as f:
    label_last4 = pickle.load(f)
    print('label_last4',label_last4.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session2_pt_特征/data_Session5_w2v2.pkl', 'rb') as f:
    wav2vec_last5 = pickle.load(f)
    print('wav2vec_last5',wav2vec_last5.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session2_pt_特征/data_Session5_label.pkl', 'rb') as f:
    label_last5 = pickle.load(f)
    print('label_last5',label_last5.shape)

wav2vec_last1 (1085, 256, 768)
label_last1 (1085,)
wav2vec_last2 (1023, 256, 768)
label_last2 (1023,)
wav2vec_last3 (1151, 256, 768)
label_last3 (1151,)
wav2vec_last4 (1031, 256, 768)
label_last4 (1031,)
wav2vec_last5 (1241, 256, 768)
label_last5 (1241,)


In [4]:
# 设置设备为GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
# 设置设备为GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 读取数据并转化成tensor类型并移动到GPU
wav2vec_last = np.concatenate((wav2vec_last1, wav2vec_last3, wav2vec_last4, wav2vec_last5), axis=0)
label_last = np.concatenate((label_last1, label_last3, label_last4, label_last5))

train_data = torch.from_numpy(wav2vec_last).float().to(device)
train_labels = torch.from_numpy(label_last).long().to(device)
test_data = torch.from_numpy(wav2vec_last2).float().to(device)
test_labels = torch.from_numpy(label_last2).long().to(device)

print(train_data.shape, train_labels.shape)

# 创建数据加载器
batch_size =32
train_dataset = TensorDataset(train_data, train_labels)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(test_data, test_labels)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# 设定参数
input_channels = 768  
output_classes = 4 
batch_size = 32  
epochs = 15  
learning_rate = 0.0001  

# 创建模型实例
model = SigModel(input_channels, output_classes).to(device)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


# 训练过程
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
    
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}")
    
    # 在验证集上进行评估
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        #for inputs, labels in test_loader:
        outputs = model(test_data)
        _, predicted = torch.max(outputs, 1)
        total += test_labels.size(0)
        correct += (predicted == test_labels).sum().item()
    
    accuracy = correct / total
    print(f"Accuracy on test set: {accuracy:.4f}")

print("Training finished!")

torch.Size([4508, 256, 768]) torch.Size([4508])
Epoch [1/15], Loss: 1.3338
Accuracy on test set: 0.5425
Epoch [2/15], Loss: 0.9486
Accuracy on test set: 0.6510
Epoch [3/15], Loss: 0.6752
Accuracy on test set: 0.6823
Epoch [4/15], Loss: 0.5336
Accuracy on test set: 0.6999
Epoch [5/15], Loss: 0.4461
Accuracy on test set: 0.7165
Epoch [6/15], Loss: 0.3823
Accuracy on test set: 0.7263
Epoch [7/15], Loss: 0.3330
Accuracy on test set: 0.7302
Epoch [8/15], Loss: 0.2913
Accuracy on test set: 0.7449
Epoch [9/15], Loss: 0.2567
Accuracy on test set: 0.7439
Epoch [10/15], Loss: 0.2274
Accuracy on test set: 0.7488
Epoch [11/15], Loss: 0.2043
Accuracy on test set: 0.7507
Epoch [12/15], Loss: 0.1834
Accuracy on test set: 0.7546
Epoch [13/15], Loss: 0.1669
Accuracy on test set: 0.7586
Epoch [14/15], Loss: 0.1530
Accuracy on test set: 0.7615
Epoch [15/15], Loss: 0.1406
Accuracy on test set: 0.7605
Training finished!
