In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pickle

In [2]:
class CustomModel(nn.Module):
    def __init__(self):
        super(CustomModel, self).__init__()
        self.conv1d = nn.Conv1d(in_channels=768, out_channels=5, kernel_size=1)
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)
        self.relu = nn.ReLU()
        self.fc = nn.Linear(5, 4) 

    def forward(self, x):
        x = x.permute(0, 2, 1)  
        x = self.conv1d(x)  
        x = self.global_avg_pool(x).squeeze(-1)  
        x = self.relu(x)
        x = self.fc(x) 
        return x

In [3]:
# 训练函数
def train(model, train_data, train_labels, criterion, optimizer, num_epochs, batch_size):
    model.train()
    for epoch in range(num_epochs):
        correct = 0
        total = 0
        running_loss = 0.0
        indices = np.arange(len(train_data))
        np.random.shuffle(indices)  # 随机打乱索引
        for i in range(0, len(train_data), batch_size):
            batch_indices = indices[i:i+batch_size]
            inputs = torch.tensor(train_data[batch_indices], dtype=torch.float32)
            labels = torch.tensor(train_labels[batch_indices], dtype=torch.long)

            optimizer.zero_grad()

            outputs = model(inputs)

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # 统计准确率
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            running_loss += loss.item()

        epoch_loss = running_loss / (len(train_data) / batch_size)
        epoch_acc = correct / total

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss:.4f}, Accuracy: {epoch_acc:.4f}')

        print('########################')

        test(model, wav2vec_last4, label_last4)


        print('########################')

In [4]:
# 测试函数
def test(model, test_data, test_labels):
    model.eval()
    with torch.no_grad():
        inputs = torch.tensor(test_data, dtype=torch.float32)
        labels = torch.tensor(test_labels, dtype=torch.long)

        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)

        accuracy = (predicted == labels).sum().item() / len(labels)
        print(f'Accuracy: {accuracy}')

In [5]:
import pickle
#读取数据集
with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session1_w2v2.pkl', 'rb') as f:
    wav2vec_last1 = pickle.load(f)
    print('wav2vec_last1',wav2vec_last1.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session1_label.pkl', 'rb') as f:
    label_last1 = pickle.load(f)
    print('label_last1',label_last1.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session2_w2v2.pkl', 'rb') as f:
    wav2vec_last2 = pickle.load(f)
    print('wav2vec_last2',wav2vec_last2.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session2_label.pkl', 'rb') as f:
    label_last2 = pickle.load(f)
    print('label_last2',label_last2.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session3_w2v2.pkl', 'rb') as f:
    wav2vec_last3 = pickle.load(f)
    print('wav2vec_last3',wav2vec_last3.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session3_label.pkl', 'rb') as f:
    label_last3 = pickle.load(f)
    print('label_last3',label_last3.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session4_w2v2.pkl', 'rb') as f:
    wav2vec_last4 = pickle.load(f)
    print('wav2vec_last4',wav2vec_last4.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session4_label.pkl', 'rb') as f:
    label_last4 = pickle.load(f)
    print('label_last4',label_last4.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session5_w2v2.pkl', 'rb') as f:
    wav2vec_last5 = pickle.load(f)
    print('wav2vec_last5',wav2vec_last5.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session5_label.pkl', 'rb') as f:
    label_last5 = pickle.load(f)
    print('label_last5',label_last5.shape)

wav2vec_last1 (1085, 256, 768)
label_last1 (1085,)
wav2vec_last2 (1023, 256, 768)
label_last2 (1023,)
wav2vec_last3 (1151, 256, 768)
label_last3 (1151,)
wav2vec_last4 (1031, 256, 768)
label_last4 (1031,)
wav2vec_last5 (1241, 256, 768)
label_last5 (1241,)


In [6]:
import numpy as np
wav2vec_last = np.concatenate((wav2vec_last1, wav2vec_last2, wav2vec_last3, wav2vec_last5),axis=0)
label_last = np.concatenate((label_last1,label_last2,label_last3,label_last5))
print(wav2vec_last.shape,label_last.shape)

(4500, 256, 768) (4500,)


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_epochs = 15
batch_size = 256

# 初始化模型、损失函数和优化器
model = CustomModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
train(model, wav2vec_last, label_last, criterion, optimizer, num_epochs, batch_size)

# 测试模型
test(model, wav2vec_last4, label_last4)

Epoch [1/15], Loss: 1.0185, Accuracy: 0.4193
########################
Accuracy: 0.39961202715809896
########################
Epoch [2/15], Loss: 0.7190, Accuracy: 0.6722
########################
Accuracy: 0.5800193986420951
########################
Epoch [3/15], Loss: 0.5223, Accuracy: 0.7256
########################
Accuracy: 0.6682832201745877
########################
Epoch [4/15], Loss: 0.3882, Accuracy: 0.9602
########################
Accuracy: 0.7274490785645005
########################
Epoch [5/15], Loss: 0.2676, Accuracy: 0.9880
########################
Accuracy: 0.7361784675072744
########################
Epoch [6/15], Loss: 0.1782, Accuracy: 0.9898
########################
Accuracy: 0.7449078564500485
########################
Epoch [7/15], Loss: 0.1347, Accuracy: 0.9907
########################
Accuracy: 0.7468477206595538
########################
Epoch [8/15], Loss: 0.1064, Accuracy: 0.9916
########################
Accuracy: 0.7468477206595538
########################
Epoch [