In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import time
from torch.utils.data import DataLoader, TensorDataset
import torchdiffeq
import controldiffeq

In [2]:
class _GRU(torch.nn.Module):
    def __init__(self, input_channels, hidden_channels, output_channels, use_intensity):
        super(_GRU, self).__init__()

        assert (input_channels % 2) == 1, "Input channels must be odd: 1 for time, plus 1 for each actual input, " \
                                          "plus 1 for whether an observation was made for the actual input."

        self.input_channels = input_channels
        self.hidden_channels = hidden_channels
        self.output_channels = output_channels
        self.use_intensity = use_intensity

        gru_channels = input_channels if use_intensity else (input_channels - 1) // 2
        self.gru_cell = torch.nn.GRUCell(input_size=gru_channels, hidden_size=hidden_channels)
        self.linear = torch.nn.Linear(hidden_channels, output_channels)

    def extra_repr(self):
        return "input_channels={}, hidden_channels={}, output_channels={}, use_intensity={}" \
               "".format(self.input_channels, self.hidden_channels, self.output_channels, self.use_intensity)

    def evolve(self, h, time_diff):
        raise NotImplementedError

    def _step(self, Xi, h, dt, half_num_channels):
        observation = Xi[:, 1: 1 + half_num_channels].max(dim=1).values > 0.5
        if observation.any():
            Xi_piece = Xi if self.use_intensity else Xi[:, 1 + half_num_channels:]
            Xi_piece = Xi_piece.clone()
            Xi_piece[:, 0] += dt
            new_h = self.gru_cell(Xi_piece, h)
            h = torch.where(observation.unsqueeze(1), new_h, h)
            dt += torch.where(observation, torch.tensor(0., dtype=Xi.dtype, device=Xi.device), Xi[:, 0])
        return h, dt

    def forward(self, times, coeffs, final_index, z0=None):
        interp = controldiffeq.NaturalCubicSpline(times, coeffs)
        X = torch.stack([interp.evaluate(t) for t in times], dim=-2)
        half_num_channels = (self.input_channels - 1) // 2

        # change cumulative intensity into intensity i.e. was an observation made or not, which is what is typically
        # used here
        X[:, 1:, 1:1 + half_num_channels] -= X[:, :-1, 1:1 + half_num_channels]

        # change times into delta-times
        X[:, 0, 0] -= times[0]
        X[:, 1:, 0] -= times[:-1]

        batch_dims = X.shape[:-2]

        if z0 is None:
            z0 = torch.zeros(*batch_dims, self.hidden_channels, dtype=X.dtype, device=X.device)

        X_unbound = X.unbind(dim=1)
        h, dt = self._step(X_unbound[0], z0, torch.zeros(*batch_dims, dtype=X.dtype, device=X.device),
                           half_num_channels)
        hs = [h]
        time_diffs = times[1:] - times[:-1]
        for time_diff, Xi in zip(time_diffs, X_unbound[1:]):
            h = self.evolve(h, time_diff)
            h, dt = self._step(Xi, h, dt, half_num_channels)
            hs.append(h)
        out = torch.stack(hs, dim=1)

        final_index_indices = final_index.unsqueeze(-1).expand(out.size(0), out.size(2)).unsqueeze(1)
        final_out = out.gather(dim=1, index=final_index_indices).squeeze(1)

        return self.linear(final_out)

class _ODERNNFunc(torch.nn.Module):
    def __init__(self, hidden_channels, hidden_hidden_channels, num_hidden_layers):
        super(_ODERNNFunc, self).__init__()

        layers = [torch.nn.Linear(hidden_channels, hidden_hidden_channels)]
        for _ in range(num_hidden_layers - 1):
            layers.append(torch.nn.Tanh())
            layers.append(torch.nn.Linear(hidden_hidden_channels, hidden_hidden_channels))
        layers.append(torch.nn.Tanh())
        layers.append(torch.nn.Linear(hidden_hidden_channels, hidden_channels))
        self.sequential = torch.nn.Sequential(*layers)

    def forward(self, t, x):
        return self.sequential(x)

class ODERNN(_GRU):
    def __init__(self, input_channels, hidden_channels, output_channels, hidden_hidden_channels, num_hidden_layers,
                 use_intensity):
        super(ODERNN, self).__init__(input_channels=input_channels,
                                     hidden_channels=hidden_channels,
                                     output_channels=output_channels,
                                     use_intensity=use_intensity)
        self.hidden_hidden_channels = hidden_hidden_channels
        self.num_hidden_layers = num_hidden_layers

        self.func = _ODERNNFunc(hidden_channels, hidden_hidden_channels, num_hidden_layers)

    def extra_repr(self):
        return "hidden_hidden_channels={}, num_hidden_layers={}".format(self.hidden_hidden_channels,
                                                                        self.num_hidden_layers)

    def evolve(self, h, time_diff):
        t = torch.tensor([0, time_diff.item()], dtype=time_diff.dtype, device=time_diff.device)
        out = torchdiffeq.odeint_adjoint(func=self.func, y0=h, t=t, method='rk4')
        return out[1]

class EmotionClassifier(torch.nn.Module):
    def __init__(self, input_channels, hidden_channels, output_channels, hidden_hidden_channels, num_hidden_layers, use_intensity):
        super(EmotionClassifier, self).__init__()
        self.conv1d = torch.nn.Conv1d(in_channels=input_channels, out_channels=9, kernel_size=1)
        self.ODERNN = ODERNN(input_channels=9, 
                              hidden_channels=hidden_channels, 
                              output_channels=output_channels, 
                              hidden_hidden_channels=hidden_hidden_channels, 
                              num_hidden_layers=num_hidden_layers, 
                              use_intensity=use_intensity)
        self.classifier = torch.nn.Linear(output_channels, num_classes)

    def forward(self, times, coeffs, final_index, z0=None):
        processed_coeffs = tuple(self.conv1d(c.permute(0, 2, 1)).permute(0, 2, 1) for c in coeffs)
        x = self.ODERNN(times, processed_coeffs, final_index, z0)
        x = self.classifier(x)
        return x

In [3]:
import pickle
#读取数据集
with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session1_w2v2.pkl', 'rb') as f:
    wav2vec_last1 = pickle.load(f)
    print('wav2vec_last1',wav2vec_last1.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session1_label.pkl', 'rb') as f:
    label_last1 = pickle.load(f)
    print('label_last1',label_last1.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session2_w2v2.pkl', 'rb') as f:
    wav2vec_last2 = pickle.load(f)
    print('wav2vec_last2',wav2vec_last2.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session2_label.pkl', 'rb') as f:
    label_last2 = pickle.load(f)
    print('label_last2',label_last2.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session3_w2v2.pkl', 'rb') as f:
    wav2vec_last3 = pickle.load(f)
    print('wav2vec_last3',wav2vec_last3.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session3_label.pkl', 'rb') as f:
    label_last3 = pickle.load(f)
    print('label_last3',label_last3.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session4_w2v2.pkl', 'rb') as f:
    wav2vec_last4 = pickle.load(f)
    print('wav2vec_last4',wav2vec_last4.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session4_label.pkl', 'rb') as f:
    label_last4 = pickle.load(f)
    print('label_last4',label_last4.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session5_w2v2.pkl', 'rb') as f:
    wav2vec_last5 = pickle.load(f)
    print('wav2vec_last5',wav2vec_last5.shape)

with open('/home/ni/step1-提取数据特征/整合-按条提取语音_Session4_pt_特征/data_Session5_label.pkl', 'rb') as f:
    label_last5 = pickle.load(f)
    print('label_last5',label_last5.shape)

wav2vec_last1 (1085, 256, 768)
label_last1 (1085,)
wav2vec_last2 (1023, 256, 768)
label_last2 (1023,)
wav2vec_last3 (1151, 256, 768)
label_last3 (1151,)
wav2vec_last4 (1031, 256, 768)
label_last4 (1031,)
wav2vec_last5 (1241, 256, 768)
label_last5 (1241,)


In [4]:
import numpy as np
wav2vec_last = np.concatenate((wav2vec_last1, wav2vec_last2, wav2vec_last3, wav2vec_last5),axis=0)
label_last = np.concatenate((label_last1,label_last2,label_last3,label_last5))
print(wav2vec_last.shape,label_last.shape)

(4500, 256, 768) (4500,)


In [5]:
# 设置设备为GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
import controldiffeq
import time
from sklearn.metrics import confusion_matrix

train_data = torch.from_numpy(wav2vec_last).float().to(device)
train_labels = torch.from_numpy(label_last).long().to(device)
test_data = torch.from_numpy(wav2vec_last4).float().to(device)
test_labels = torch.from_numpy(label_last4).long().to(device)

print(train_data.shape, train_labels.shape)

# 创建数据加载器
batch_size = 32
train_dataset = TensorDataset(train_data, train_labels)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# 参数设置
input_channels = 769  
hidden_channels = 128
output_channels = 64
hidden_hidden_channels = 32
num_hidden_layers = 2
use_intensity = False
num_classes = 4
learning_rate = 0.001
num_epochs = 15

# 创建模型、损失函数和优化器，并将模型移动到GPU
model = EmotionClassifier(input_channels, hidden_channels, output_channels, hidden_hidden_channels, num_hidden_layers, use_intensity).to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 训练模型
for epoch in range(num_epochs):
    model.train()
    start_time = time.time()
    
    for batch_data, batch_labels in train_loader:
        batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)  

        batch_size_actual = batch_data.size(0)  
        times = torch.arange(0, 256, device=device).float()  
        data_with_time = torch.cat([times.unsqueeze(1).expand(-1, batch_size_actual).transpose(0, 1).unsqueeze(-1), batch_data], dim=-1)
        coeffs = controldiffeq.natural_cubic_spline_coeffs(times, data_with_time)
        final_index = torch.tensor([255] * batch_size_actual, device=device)

        optimizer.zero_grad()
        outputs = model(times, coeffs, final_index)
        loss = criterion(outputs, batch_labels)
        loss.backward()
        optimizer.step()

    # 计算本轮的时间
    end_time = time.time()
    epoch_duration = end_time - start_time
    
    # 打印训练损失
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, Duration: {epoch_duration:.2f} seconds')

    # 测试模型
    model.eval()
    all_predictions = []
    all_true_labels = []

    with torch.no_grad():
        batch_size_actual = test_data.size(0)
        times = torch.arange(0, 256, device=device).float()
        data_with_time = torch.cat([times.unsqueeze(0).unsqueeze(-1).expand(batch_size_actual, -1, -1), test_data], dim=-1)
        coeffs = controldiffeq.natural_cubic_spline_coeffs(times, data_with_time)
        final_index = torch.tensor([255] * batch_size_actual, device=device)

        outputs = model(times, coeffs, final_index)
        _, predicted = torch.max(outputs.data, 1)
        
        all_predictions.extend(predicted.cpu().numpy())
        all_true_labels.extend(test_labels.cpu().numpy())

    # 计算未加权精度 (UA)
    conf_matrix = confusion_matrix(all_true_labels, all_predictions)
    class_accuracy = conf_matrix.diagonal() / conf_matrix.sum(axis=1)
    UA = np.mean(class_accuracy)
    
    # 打印未加权精度 (UA)
    print(f'Unweighted Accuracy (UA) on test set after epoch {epoch + 1}: {100 * UA:.2f}%')

# 最终输出预测结果
print(all_predictions)

torch.Size([4500, 256, 768]) torch.Size([4500])
Epoch [1/15], Loss: 0.9182, Duration: 161.20 seconds
Unweighted Accuracy (UA) on test set after epoch 1: 40.23%
Epoch [2/15], Loss: 0.1881, Duration: 160.66 seconds
Unweighted Accuracy (UA) on test set after epoch 2: 53.99%
Epoch [3/15], Loss: 0.2278, Duration: 160.05 seconds
Unweighted Accuracy (UA) on test set after epoch 3: 59.36%
Epoch [4/15], Loss: 0.0156, Duration: 159.93 seconds
Unweighted Accuracy (UA) on test set after epoch 4: 60.92%
Epoch [5/15], Loss: 0.0248, Duration: 159.92 seconds
Unweighted Accuracy (UA) on test set after epoch 5: 55.26%
Epoch [6/15], Loss: 0.0076, Duration: 159.89 seconds
Unweighted Accuracy (UA) on test set after epoch 6: 64.78%
Epoch [7/15], Loss: 0.0330, Duration: 159.89 seconds
Unweighted Accuracy (UA) on test set after epoch 7: 57.98%
Epoch [8/15], Loss: 0.0026, Duration: 159.82 seconds
Unweighted Accuracy (UA) on test set after epoch 8: 67.31%
Epoch [9/15], Loss: 0.0322, Duration: 158.96 seconds
Unw