# working model for tensorfusion

In [40]:
import pickle
import torch
from torchmetrics import F1Score
import pandas as pd
import numpy as np
from glob import glob
from collections import Counter
from datasets import load_dataset, Dataset, Audio, Features
from torch.utils.data import Dataset, DataLoader, random_split
from torch import nn, optim
print(torch.__version__)

1.13.1+cu117


## 

dict_for_dataset = [embeddings_wav_train, embeddings_wav_val, embeddings_wav_test,
                    embeddings_txt_train, embeddings_txt_val, embeddings_txt_test,
                    KEMDY20_dict]

In [1]:
import pickle
with open('/home/arplab/project/paradeigma/multi_modal/model/data/paradeigma_KEMDY20_for_dataset.pkl', 'rb') as f:
    dict_for_dataset = pickle.load(f)


  from .autonotebook import tqdm as notebook_tqdm


In [13]:
embeddings_wav_train, embeddings_wav_val, embeddings_wav_test, embeddings_txt_train, embeddings_txt_val, embeddings_txt_test, KEMDY20_dict = dict_for_dataset
train_df = KEMDY20_dict['train_df'][1]
val_df = KEMDY20_dict['val_df'][1]
test_df = KEMDY20_dict['test_df'][1]

In [14]:
def sequence_padding(ts_list, padding_length = 50, mode = 'constant'):
    
    padding_value=0
    
    if (type(ts_list) != type([])) :
        ts_list = [padding_value] * padding_length
    
    elif len(ts_list) >= padding_length :
        ts_list = ts_list[0:padding_length]
    
    elif mode == 'constant':
        length = padding_length - len(ts_list)
        extend_list = [padding_value] * length
        ts_list = ts_list + extend_list    
        
    elif mode == 'replicate':
        
        quotient = padding_length // len(ts_list)
        remainder = padding_length % len(ts_list)
        result = ts_list * quotient
        result += ts_list[:remainder]
        ts_list = result
        
    return ts_list 

train_df['Scaled EDA'] = train_df['Scaled EDA'].apply(sequence_padding)
train_df['Scaled TEMP'] = train_df['Scaled TEMP'].apply(sequence_padding)
val_df['Scaled EDA'] = val_df['Scaled EDA'].apply(sequence_padding)
val_df['Scaled TEMP'] = val_df['Scaled TEMP'].apply(sequence_padding)
test_df['Scaled EDA'] = test_df['Scaled EDA'].apply(sequence_padding)
test_df['Scaled TEMP'] = test_df['Scaled TEMP'].apply(sequence_padding)

In [41]:
class EtriDataset(Dataset):
    def __init__(self, file_names, 
                 text_embeddings, 
                 wav_embeddings, 
                 Temp,
                 EDA,
                 Emotion,
                 Emotion_ext, 
                 Arousal, 
                 Valence):
        self.file_names = file_names
        self.text_embeddings = text_embeddings
        self.wav_embeddings = wav_embeddings
        self.temp = Temp
        self.eda = EDA
        self.label_emotion = Emotion
        self.label_emotion_ext = Emotion_ext
        self.label_arousal = Arousal
        self.label_valence = Valence
        
    def __len__(self):
        return len(self.file_names)

    def __getitem__(self, idx):
        text_embeddings = self.text_embeddings[idx]
        wav_embeddings = self.wav_embeddings[idx]
        temp = self.temp[idx]
        eda = self.eda[idx]
        label_emotion = self.label_emotion[idx]
        label_emotion_ext = self.label_emotion_ext[idx]
        label_arousal = self.label_arousal[idx]
        label_valence = self.label_valence[idx]
        return text_embeddings, wav_embeddings, temp, eda, label_emotion, label_emotion_ext, label_arousal, label_valence

In [42]:
train_dataset = EtriDataset(file_names = train_df['Segment ID'],
                      text_embeddings = embeddings_txt_train,
                      wav_embeddings = embeddings_wav_train,
                      Emotion = train_df['Emotion'],
                      Arousal = train_df['Arousal'],
                      Valence = train_df['Valence'],
                      EDA = torch.concat(list(train_df['Scaled EDA'].apply(lambda x: torch.tensor(x).view(1,-1)))), 
                      Temp = torch.concat(list(train_df['Scaled TEMP'].apply(lambda x: torch.tensor(x).view(1,-1)))), 
                      Emotion_ext = torch.concat(list(train_df['emotion_vector'].apply(lambda x: torch.tensor(x).view(1,-1))))
                      )

validation_dataset = EtriDataset(file_names = val_df['Segment ID'],
                      text_embeddings = embeddings_txt_val,
                      wav_embeddings = embeddings_wav_val,
                      Emotion = val_df['Emotion'],
                      Arousal = val_df['Arousal'],
                      Valence = val_df['Valence'],
                      EDA = torch.concat(list(val_df['Scaled EDA'].apply(lambda x: torch.tensor(x).view(1,-1)))), 
                      Temp = torch.concat(list(val_df['Scaled TEMP'].apply(lambda x: torch.tensor(x).view(1,-1)))), 
                      Emotion_ext = torch.concat(list(val_df['emotion_vector'].apply(lambda x: torch.tensor(x).view(1,-1))))
                      )

test_dataset = EtriDataset(file_names = test_df['Segment ID'],
                      text_embeddings = embeddings_txt_test,
                      wav_embeddings = embeddings_wav_test,
                      Emotion = test_df['Emotion'],
                      Arousal = test_df['Arousal'],
                      Valence = test_df['Valence'],
                      EDA = torch.concat(list(test_df['Scaled EDA'].apply(lambda x: torch.tensor(x).view(1,-1)))), 
                      Temp = torch.concat(list(test_df['Scaled TEMP'].apply(lambda x: torch.tensor(x).view(1,-1)))),
                      Emotion_ext = torch.concat(list(test_df['emotion_vector'].apply(lambda x: torch.tensor(x).view(1,-1))))
                      )

In [44]:
print(f"Training Data Size : {len(train_dataset)}")
print(f"Validation Data Size : {len(validation_dataset)}")
print(f"Testing Data Size : {len(test_dataset)}")

Training Data Size : 2746
Validation Data Size : 2535
Testing Data Size : 2580


In [45]:
train_dataloader = DataLoader(train_dataset, batch_size=256, shuffle=True, drop_last=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=64, shuffle=True, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True, drop_last=True)

In [61]:
# encoding Emotion for whole data
# 사전에 실제로 encoding한 끝 수가 마지막 linear layer의 끝자리랑 맞아야 합니다. 아니면 CUDA error: CUBLAS_STATUS_EXECUTION_FAILED가 나는 것 같아요.
# 예를 들어, label이 0~9, 11,13이렇게 12개가 되었어도, 0~13은 14개니까 마지막 레이어에서 14개 unit을 받아야 multiclass classification이 에러없이 진행됩니다!
# 데이터에서 정답 라벨 인코딩: ['neutral', 'happy', 'surprise', 'disgust', 'angry', 'sad', 'fear']
# 이 순서를 지켜서 라벨링을 해야함
encode_dict = {'neutral':0, 'happy':1, 'surprise':2, 'disgust':3, 'angry':4, 'sad':5, 'fear':6,
               'surprise;neutral': 7, 'neutral;sad': 8, 'happy;neutral': 9, 'angry;neutral': 10, 
               'neutral;disqust': 11, 'neutral;fear': 12, 'happy;surprise': 13, 'happy;angry;neutral': 14,
               'angry;disqust': 15, 'happy;surprise;neutral': 16, 'happy;fear': 17,'happy;neutral;fear': 18,
               'angry;neutral;disqust': 19, 'neutral;disqust;sad': 20, 'angry;neutral;disqust;fear;sad': 21,
               'happy;sad': 22, 'happy;neutral;disqust': 23}
decode_dict = {b:i for i, b in encode_dict.items()}
encode_dict, decode_dict

({'neutral': 0,
  'happy': 1,
  'surprise': 2,
  'disgust': 3,
  'angry': 4,
  'sad': 5,
  'fear': 6,
  'surprise;neutral': 7,
  'neutral;sad': 8,
  'happy;neutral': 9,
  'angry;neutral': 10,
  'neutral;disqust': 11,
  'neutral;fear': 12,
  'happy;surprise': 13,
  'happy;angry;neutral': 14,
  'angry;disqust': 15,
  'happy;surprise;neutral': 16,
  'happy;fear': 17,
  'happy;neutral;fear': 18,
  'angry;neutral;disqust': 19,
  'neutral;disqust;sad': 20,
  'angry;neutral;disqust;fear;sad': 21,
  'happy;sad': 22,
  'happy;neutral;disqust': 23},
 {0: 'neutral',
  1: 'happy',
  2: 'surprise',
  3: 'disgust',
  4: 'angry',
  5: 'sad',
  6: 'fear',
  7: 'surprise;neutral',
  8: 'neutral;sad',
  9: 'happy;neutral',
  10: 'angry;neutral',
  11: 'neutral;disqust',
  12: 'neutral;fear',
  13: 'happy;surprise',
  14: 'happy;angry;neutral',
  15: 'angry;disqust',
  16: 'happy;surprise;neutral',
  17: 'happy;fear',
  18: 'happy;neutral;fear',
  19: 'angry;neutral;disqust',
  20: 'neutral;disqust;sad',

# NetWork 만들기

In [46]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [71]:
class MLPNetwork_pre(nn.Module):
    def __init__(self, input_length, input_width):
        super().__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(input_length*input_width, 768)
        self.gelu1 = nn.GELU()
        self.bn1 = nn.BatchNorm1d(768)
        self.fc2 = nn.Linear(768, 512)
        self.gelu2 = nn.GELU()
        self.bn2 = nn.BatchNorm1d(512)
        self.fc3 = nn.Linear(512, 32)
        self.gelu3 = nn.GELU()
        
    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.gelu1(x)
        x = self.bn1(x)
        x = self.fc2(x)
        x = self.gelu2(x)
        x = self.bn2(x)
        x = self.fc3(x)
        output = self.gelu3(x)
        return output
    
class ConvNetwork_pre(nn.Module):
    def __init__(self, input_channel):
        super().__init__()
        self.conv1 = nn.Conv1d(in_channels = input_channel, out_channels= 32, kernel_size = 3, padding = 1)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv1d(in_channels = 32, out_channels = 10, kernel_size = 3, padding = 1)
        self.relu2 = nn.ReLU()
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        output = self.relu2(x)
        return output

class ConvNetwork_final(nn.Module):
    def __init__(self, input_channel):
        super().__init__()
        self.conv2d_1 = nn.Conv2d(in_channels = input_channel, out_channels = 64, kernel_size=2)
        self.leakyrelu_1 = nn.LeakyReLU()
        self.maxpool2d_1 = nn.MaxPool2d(2)
        self.conv2d_2 = nn.Conv2d(in_channels = 64, out_channels = 32, kernel_size=2)
        self.leakyrelu_2 = nn.LeakyReLU()
        self.maxpool2d_2 = nn.MaxPool2d(2)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(224, 64)
        self.leakyrelu_3 = nn.LeakyReLU()
        self.batchnorm = nn.BatchNorm1d(64)
        self.drop = nn.Dropout(p=0.25)
        self.fc2 = nn.Linear(64, 2)
        
    def forward(self, x):
        x = self.conv2d_1(x)
        x = self.leakyrelu_1(x)
        x = self.maxpool2d_1(x)
        x = self.conv2d_2(x)
        x = self.leakyrelu_2(x)
        x = self.maxpool2d_2(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.leakyrelu_3(x)
        x = self.batchnorm(x)
        x = self.drop(x)
        output = self.fc2(x)  
        return output
        



In [72]:
class TensorFusionMixer(nn.Module):
    def __init__(self, ModelA, ModelB, ModelC, ModelD, ModelE):
        super().__init__()
        self.ModelA = ModelA
        self.ModelB = ModelB
        self.ModelC = ModelC
        self.ModelD = ModelD
        self.Model_cnn_final = ModelE
        # self.softmax = nn.Softmax(dim=1)
        
    def tensor_fusion(self, batch_arr1, batch_arr2, batch_arr3):
        fusion_matrix_lst = []
        for i, (arr1, arr2, arr3) in enumerate(zip(batch_arr1, batch_arr2, batch_arr3)):
            arr1 = arr1.unsqueeze(-1).unsqueeze(-1)
            arr2 = arr2.unsqueeze(0).unsqueeze(-1)
            arr3 = arr3.squeeze().unsqueeze(0).unsqueeze(0)
            
            # outer_matrix = torch.einsum('i,j,kp->ijk', arr1, arr2, arr3)
            kron_matrix = torch.kron(torch.kron(arr1,arr2), arr3)
            l, w, d = kron_matrix.shape
            
            kron_matrix = kron_matrix.view(-1, l, w, d)
            fusion_matrix_lst.append(kron_matrix)
            
        fusion_matrix = torch.concat(fusion_matrix_lst)
        # fusion_matrix = fusion_matrix.unsqueeze(-1)
        
        return fusion_matrix
        
    def forward(self, x1, x2, x3, x4):
        x1 = self.ModelA(x1)
        x2 = self.ModelB(x2)
        x3 = self.ModelC(x3)
        x4 = self.ModelD(x4)
        
        x5 = torch.cat([x3,x4], dim=0)
        fusion_matrix = self.tensor_fusion(x1, x2, x5)
        
        output = self.Model_cnn_final(fusion_matrix) # 새로운 emotion사용
        # output = self.softmax(x) # 기존 emotion사용
        return output




In [73]:
_, txt_input_length, txt_input_width = train_dataset.text_embeddings.shape
_, wav_input_length, wav_input_width = train_dataset.wav_embeddings.shape
temp_input_length = train_dataset.temp.shape[1]
eda_input_length = train_dataset.eda.shape[1]

# tf_mixer에 들어갈 wav mlp, txt mlp 선언
model_mlp_txt = MLPNetwork_pre(txt_input_length,txt_input_width).to(device)
model_mlp_wav = MLPNetwork_pre(wav_input_length,wav_input_width).to(device)
model_conv_temp = ConvNetwork_pre(temp_input_length).to(device)
model_conv_eda = ConvNetwork_pre(eda_input_length).to(device)

model_cnn_final = ConvNetwork_final(32).to(device)

# 최종 모델 선언
model_tf_cnn_mixer = TensorFusionMixer(ModelA = model_mlp_txt, 
                                   ModelB = model_mlp_wav,
                                   ModelC = model_conv_temp,
                                   ModelD = model_conv_eda,
                                   ModelE = model_cnn_final).to(device)

# model 병렬 학습 처리
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    model_mlp_txt = nn.DataParallel(model_mlp_txt).to(device)
    model_mlp_wav = nn.DataParallel(model_mlp_wav).to(device)
    model_conv_temp = nn.DataParallel(model_conv_temp).to(device)
    model_conv_eda = nn.DataParallel(model_conv_eda).to(device)
    model_tf_cnn_mixer = nn.DataParallel(model_tf_cnn_mixer).to(device)
print(model_tf_cnn_mixer)

TensorFusionMixer(
  (ModelA): MLPNetwork_pre(
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (fc1): Linear(in_features=61440, out_features=768, bias=True)
    (gelu1): GELU(approximate='none')
    (bn1): BatchNorm1d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (fc2): Linear(in_features=768, out_features=512, bias=True)
    (gelu2): GELU(approximate='none')
    (bn2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (fc3): Linear(in_features=512, out_features=32, bias=True)
    (gelu3): GELU(approximate='none')
  )
  (ModelB): MLPNetwork_pre(
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (fc1): Linear(in_features=114432, out_features=768, bias=True)
    (gelu1): GELU(approximate='none')
    (bn1): BatchNorm1d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (fc2): Linear(in_features=768, out_features=512, bias=True)
    (gelu2): GELU(approximate='none')
    (bn2): BatchNorm1d(512, eps=1e-

In [74]:
class CCC(nn.Module):
    def __init__(self):
        super(CCC, self).__init__()
        self.mean = torch.mean
        self.var = torch.var
        self.sum = torch.sum
        self.sqrt = torch.sqrt
        self.std = torch.std
        
    def forward(self, pred, target):
        mean_gt = self.mean (target, 0)
        mean_pred = self.mean (pred, 0)
        var_gt = self.var (target, 0)
        var_pred = self.var (pred, 0)
        v_pred = pred - mean_pred
        v_gt = target - mean_gt
        cor = self.sum (v_pred * v_gt) / (self.sqrt(self.sum(v_pred ** 2)) * self.sqrt(self.sum(v_gt ** 2)))
        sd_gt = self.std(target)
        sd_pred = self.std(pred)
        numerator = 2 * cor * sd_gt * sd_pred
        denominator = var_gt + var_pred + (mean_gt-mean_pred) ** 2
        ccc = numerator / denominator
        return ccc
    
ccc = CCC()

# 학습을 위한 train, test method 만들기

In [75]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)    
    # data 순서: text_embeddings, wav_embeddings, temp, eda, label_emotion, label_emotion_ext, label_arousal, label_valence
    for batch, (X_txt, X_wav, X_temp, X_eda, 
                    label_emotion, label_emotion_ext, label_arousal, label_valence) in enumerate(dataloader): 

        # 예측 오류 계산 
        X_txt, X_wav, X_temp, X_eda, y_v, y_a= X_txt.to(device), X_wav.to(device), X_temp.to(device), X_eda.to(device),label_valence.type(torch.float32).to(device), label_arousal.type(torch.float32).to(device)
        
        X_temp = X_temp.unsqueeze(dim=-1)
        X_eda = X_eda.unsqueeze(dim=-1)
        
        pred = model(X_txt, X_wav, X_temp, X_eda)
    
        pred_v = pred[:,0]
        pred_a = pred[:,1]
        
        loss_a = loss_fn(pred_a, y_a)
        loss_v = loss_fn(pred_v, y_v)

        pred_ccc_a = ccc(pred_a, y_a)
        pred_ccc_v= ccc(pred_v, y_v)
        
        ccc_mean = (pred_ccc_a + pred_ccc_v) / 2

        # 역전파
        optimizer.zero_grad()
        loss_a.backward(retain_graph = True)
        loss_v.backward()
        optimizer.step()
        
        if batch % 100 == 0:
            loss_a, loss_v, current = loss_a.item(), loss_v.item(), batch * len(X_txt)
            print(f"loss_a: {loss_a:>7f}, loss_b: {loss_v:>7f},  [{current:>5d}/{size:>5d}]")
            print(f"ccc_mean : {ccc_mean:>9f}, Arousal_ccc : {pred_ccc_a:>9f}, Valence_ccc : {pred_ccc_v:>9f}")

In [77]:
def test(dataloader, model, loss_fn, mode = 'test'):
    
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss_a = 0
    test_loss_v = 0
    ccc_mean = 0
    ccc_a = 0
    ccc_v = 0
    
    with torch.no_grad():
        # data 순서: text_embeddings, wav_embeddings, temp, eda, label_emotion, label_emotion_ext, label_arousal, label_valence
        for batch, (X_txt, X_wav, X_temp, X_eda, 
                    label_emotion, label_emotion_ext, label_arousal, label_valence) in enumerate(dataloader): 

            # 예측 오류 계산 
            X_txt, X_wav, X_temp, X_eda, y_v, y_a= X_txt.to(device), X_wav.to(device), X_temp.to(device), X_eda.to(device),label_valence.type(torch.float32).to(device), label_arousal.type(torch.float32).to(device)
            
            X_temp = X_temp.unsqueeze(dim=-1)
            X_eda = X_eda.unsqueeze(dim=-1)
        
            
            pred = model(X_txt, X_wav, X_temp, X_eda)
            preds.append(pred.argmax(1)) # multi regression후 classification으로 변환할 경우
            # preds.append(pred)# 바로 multiclassification할 경우
            targets.append(label_emotion) # classification을 할 경우 언제나 사용
            print('예측라벨분포:',pred[:2], '정답라벨 분포:', label_emotion_ext[:2], '예측정답:', pred.argmax(1)[:2],'정답:', label_emotion[:2])
            # https://discuss.pytorch.org/t/loss-backward-raises-error-grad-can-be-implicitly-created-only-for-scalar-outputs/12152/6
            test_loss += loss_fn(pred, y).mean().item()# weighted MSE를 사용할 경우 중간에 sum() or mean()을 넣어줌 
            
            correct += (pred.argmax(1) == label_emotion.to(device)).type(torch.float).sum().item()
            
    test_loss /= num_batches
    correct /= size
    if mode == 'test':
        print(torch.cat(preds), torch.cat(preds).shape)
        print("f1 score: ", f1(torch.cat(preds).to(device), torch.cat(targets).to(device)))
        print(f"Test Error: Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f}\n")
    elif mode == 'val':
        print(f"Validation Error: Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

# 학습시키기

In [78]:
# weight 계산
single_emotion = [0,1,2,4,9,10,12]
total_obs = 0
for i in single_emotion:
    total_obs += Counter(merged_dataset['Emotion'])[i]
total_obs
# weigted loss for imbalance data: https://naadispeaks.wordpress.com/2021/07/31/handling-imbalanced-classes-with-weighted-loss-in-pytorch/
weight_for_class = []
Counter(merged_dataset['Emotion'])
for idx, value in sorted(Counter(merged_dataset['Emotion']).items()):
    if idx in [0,1,2,4,9,10,12]:
        weight_for_class.append(1 - (value/total_obs))
weight_for_class = torch.Tensor(weight_for_class)
weight_for_class

tensor([0.1186, 0.9287, 0.9913, 0.9900, 0.9780, 0.9940, 0.9993])

In [79]:
class weighted_MSELoss(nn.Module):
    def __init__(self, weight):
        super().__init__()
        self.weight = weight.to(device)
    def forward(self,inputs,targets):
        return ((inputs - targets)**2) * self.weight

In [80]:
# # 지난 저장한 모델이 있다면
# PATH = './data/test_model.pkl'
# model_tf_mixer = torch.load(PATH)

In [81]:
# loss_fn = nn.CrossEntropyLoss(weight=weight_for_class).to(device)
# loss_fn = nn.CrossEntropyLoss().to(device) # weigth를 주기위해 위의 loss로 임시 변경
loss_fn = weighted_MSELoss(weight = weight_for_class).to(device) # multi target regression(감정별로 count 한 타겟)

In [82]:
lr = 1e-3

# optimizer = optim.SGD(model_tf_mixer.parameters(), lr=lr) # classification
optimizer = optim.Adagrad(model_tf_mixer.parameters(), lr=lr) # regression

## start training

In [83]:
# Set the Training Parameters

epochs = 10
for epoch in range(epochs):
    print(f"---------------Epoch {epoch+1}----------------")
    train(train_dataloader, model_tf_mixer, loss_fn, optimizer)
    test(validation_dataloader, model_tf_mixer, loss_fn, mode = 'val')
print("Done!")

---------------Epoch 1----------------
loss: 2.507085  [    0/ 1084]
예측라벨분포: tensor([[-0.0185,  0.0831, -0.0551,  0.0355,  0.0652, -0.1144, -0.0718],
        [ 0.0048,  0.1884, -0.0386,  0.0023, -0.0235, -0.0992, -0.0574]],
       device='cuda:0') 정답라벨 분포: tensor([[ 4.,  0.,  2.,  2.,  2.,  0.,  0.],
        [10.,  0.,  0.,  0.,  0.,  0.,  0.]]) 예측정답: tensor([1, 1], device='cuda:0') 정답: tensor([0., 0.], dtype=torch.float64)
예측라벨분포: tensor([[-0.0159,  0.0987, -0.0446,  0.0345, -0.0326, -0.0902, -0.0738],
        [-0.0192,  0.0900, -0.0233,  0.0551,  0.0039, -0.1085, -0.0822]],
       device='cuda:0') 정답라벨 분포: tensor([[6., 0., 0., 1., 3., 0., 0.],
        [3., 7., 0., 0., 0., 0., 0.]]) 예측정답: tensor([1, 1], device='cuda:0') 정답: tensor([0., 1.])
예측라벨분포: tensor([[ 0.0168,  0.1094, -0.0552,  0.0126, -0.0218, -0.1104, -0.0788],
        [ 0.0051,  0.0562, -0.0492,  0.0153, -0.0418, -0.1013, -0.0996]],
       device='cuda:0') 정답라벨 분포: tensor([[ 9.,  1.,  0.,  0.,  0.,  0.,  0.],
        [10.,  

In [None]:
# 실험을 위해 모델 저장
PATH = './data/test_model_multilabelregression.pkl'
torch.save(model_tf_mixer, PATH)

## basic TensorFusionNet 검증

In [67]:
test(test_dataloader, model_tf_mixer, loss_fn, mode = 'test')

예측라벨분포: tensor([[-0.0006,  0.1168, -0.0863,  0.0128,  0.0321,  0.0305, -0.0390],
        [-0.0006,  0.1168, -0.0862,  0.0128,  0.0321,  0.0305, -0.0390]],
       device='cuda:0') 정답라벨 분포: tensor([[4., 3., 3., 0., 0., 0., 0.],
        [9., 1., 0., 0., 0., 0., 0.]]) 예측정답: tensor([1, 1], device='cuda:0') 정답: tensor([0., 0.])
예측라벨분포: tensor([[-0.0006,  0.1168, -0.0862,  0.0128,  0.0321,  0.0305, -0.0390],
        [-0.0006,  0.1168, -0.0862,  0.0128,  0.0321,  0.0305, -0.0390]],
       device='cuda:0') 정답라벨 분포: tensor([[9., 0., 1., 0., 0., 0., 0.],
        [8., 0., 0., 0., 2., 0., 0.]]) 예측정답: tensor([1, 1], device='cuda:0') 정답: tensor([0., 0.])
예측라벨분포: tensor([[-0.0006,  0.1168, -0.0862,  0.0128,  0.0321,  0.0305, -0.0390],
        [-0.0006,  0.1168, -0.0862,  0.0128,  0.0321,  0.0305, -0.0390]],
       device='cuda:0') 정답라벨 분포: tensor([[6., 0., 0., 4., 0., 0., 0.],
        [1., 9., 0., 0., 0., 0., 0.]]) 예측정답: tensor([1, 1], device='cuda:0') 정답: tensor([0., 1.], dtype=torch.float64)
tensor(

In [None]:
for batch, (X_txt, X_wav, X_temp, X_eda, 
                        label_emotion, label_emotion_ext, label_arousal, label_valence) in list(enumerate(test_dataloader))[:4]:
    print(X_txt,X_wav,label_emotion)

tensor([[[-0.0372, -0.1859,  0.3214,  ..., -0.5023, -0.2907, -0.5705],
         [-0.3403, -0.6411,  0.2710,  ..., -0.4994,  0.1688, -0.6985],
         [-0.2038,  0.1151,  0.3303,  ...,  0.0532,  0.0731, -0.7255],
         ...,
         [ 0.0387,  0.1966,  0.1149,  ..., -0.5742, -0.1841, -0.6823],
         [ 0.3737,  0.0196, -0.0652,  ..., -0.3645,  0.1043, -0.9926],
         [ 0.4129, -0.1367,  0.0095,  ..., -0.4642,  0.2714, -0.9962]],

        [[ 1.3079,  0.3727,  1.6503,  ...,  0.9950, -0.2278,  1.1599],
         [ 1.6657, -0.2183,  2.2059,  ...,  0.7725,  1.6774,  0.4149],
         [-0.3042, -1.3660,  1.2436,  ...,  1.1886, -0.1148,  1.5758],
         ...,
         [ 0.9146, -0.8450,  1.8701,  ...,  0.0545, -0.1092,  1.0398],
         [ 0.9892, -0.2653,  1.9712,  ...,  0.7016,  0.0530,  0.9379],
         [ 0.9061, -0.3329,  1.8953,  ...,  0.7556,  0.1093,  0.9114]],

        [[-0.3704, -0.2984,  0.9155,  ..., -1.2681, -0.0433,  0.2413],
         [ 0.1247, -0.0254,  0.4202,  ..., -0

In [None]:
probs = model_tf_mixer(X_txt.to(device), X_wav.to(device))
for i in torch.argmax(probs, dim=1):
    if decode_dict[int(i)] != 'neutral':
        print(decode_dict[int(i)])
    

# Ensemble Model for Machine Learning

In [None]:
import random




ts_session_index = list(range(1,40+1))
# ts data가 없는 session 추출대상 index에서 제외
ts_session_index.remove(12) 
ts_session_index.remove(17)

# 80% random하게 train으로 추출, 20%를 테스트로 추출
ts_session_index_train = random.sample(ts_session_index, k = int(38*.8))
ts_session_index_train = sorted(ts_session_index_train)
ts_session_index_test = [i for i in ts_session_index if i not in ts_session_index_train]
ts_session_index_test = sorted(ts_session_index_test)
print(ts_session_index_train, '\n',ts_session_index_test)

[1, 2, 3, 4, 5, 6, 7, 8, 10, 14, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 34, 36, 37, 38, 40] 
 [9, 11, 13, 16, 29, 33, 35, 39]


In [None]:
ts_dataset[1].head(5)

Unnamed: 0,segment_id,emotion,valence,arousal,eda,temp
0,Sess01_script01_User002M_001,neutral,3.4,2.9,"[2.856493, 2.788578, 2.678377, 2.652749, 2.645...","[34.81, 34.81, 34.81, 34.79, 34.79, 34.79, 34...."
1,Sess01_script01_User002M_002,neutral,3.1,2.9,"[2.647035, 2.653442, 2.631658, 2.614999, 2.623...","[34.79, 34.79, 34.79, 34.77, 34.77, 34.77, 34...."
2,Sess01_script01_User002M_003,neutral,3.1,3.0,"[2.896217, 2.975664, 3.02692, 3.071769, 3.0922...","[34.75, 34.75, 34.75, 34.75, 34.79, 34.79, 34...."
3,Sess01_script01_User002M_004,neutral,3.7,3.1,"[3.689019, 3.692863, 3.659546, 3.409672, 3.123...","[34.77, 34.77, 34.77, 34.77, 34.77, 34.77, 34...."
4,Sess01_script01_User001F_001,neutral,3.8,2.8,"[4.412055, 4.536246, 4.630991, 4.669401, 4.661...","[34.55, 34.55, 34.55, 34.55, 34.55, 34.55, 34...."
