In [18]:
import os

import numpy as np
import pandas as pd
import torch

# Feature Concatenate

In [19]:
valence_lstm_features = np.load('valence_lstm_features.npy', allow_pickle=True).tolist()
arousal_lstm_features = np.load('arousal_lstm_features.npy', allow_pickle=True).tolist()
emotion_lstm_features = np.load('emotion_lstm_features.npy', allow_pickle=True).tolist()
labels = np.load('labels.npy', allow_pickle=True)

In [20]:
l = labels.tolist()
l = [i.tolist() for i in labels]
l_list = sum(l,[])
print(l_list.count(1))
print(l_list.count(0))

41591
78639


In [25]:
# convert each element of the list to a tensor
valence_lstm_tensors = [torch.tensor(arr) for arr in valence_lstm_features]
arousal_lstm_tensors = [torch.tensor(arr) for arr in arousal_lstm_features]
emotion_lstm_tensors = [torch.tensor(arr) for arr in emotion_lstm_features]

In [26]:
features_concatenate_list = []
for valence_feature, arousal_feature, emotion_feature in zip(valence_lstm_tensors, arousal_lstm_tensors, emotion_lstm_tensors):
    concated_feature = torch.cat((valence_feature, arousal_feature, emotion_feature), dim=1)
    features_concatenate_list.append(concated_feature)

# Dataset, DataLoader

In [27]:
from torch.utils.data import DataLoader, Dataset

In [28]:
class MyDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
        
    def __len__(self):
        return len(self.data)
        
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

In [29]:
# Create an instance of dataset
md = MyDataset(features_concatenate_list,labels)

In [30]:
dataloader = DataLoader(md)

# Model

In [31]:
import torch
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        # Set initial hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        
        return out

In [32]:
import torch
import torch.nn as nn

class LSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        # Set initial hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))
        
        # Decode the hidden state of the last time step
        output = self.fc(out[:, -1, :])
        
        # Apply sigmoid activation function to output
        output = self.sigmoid(output)
        
        return output, out[:, -1, :]


In [149]:
import torch
import torch.nn as nn

class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        # Set initial hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # Forward propagate GRU
        out, _ = self.gru(x, h0)
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        
        return out


# Training

In [33]:
num_epochs = 100
learning_rate = 0.001

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

input_size = 224
hidden_size = 128
num_layers = 3
output_size = 1

model = LSTMClassifier(input_size, hidden_size, num_layers, output_size).to(device)

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [34]:
from tqdm.notebook import tqdm_notebook
# LSTM
for epoch in range(num_epochs):
    running_loss = 0.0
    for inputs, labels in tqdm_notebook(dataloader, total=len(dataloader), desc='Training'):
        if inputs.shape[1] != labels.shape[1] : continue
        
        inputs = inputs.transpose(0,1).to(device)
        labels = labels.squeeze(0).to(device)
        
        optimizer.zero_grad()

        # Forward pass
        outputs,out = model(inputs)
        outputs = outputs.squeeze(1)
        loss = criterion(outputs, labels.float())

        # Backward and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, running_loss/len(dataloader)))


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [1/100], Loss: 0.6268


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [2/100], Loss: 0.6223


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [3/100], Loss: 0.6220


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [4/100], Loss: 0.6218


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [5/100], Loss: 0.6217


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [6/100], Loss: 0.6216


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [7/100], Loss: 0.6216


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [8/100], Loss: 0.6215


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [9/100], Loss: 0.6215


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [10/100], Loss: 0.6214


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [11/100], Loss: 0.6214


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [12/100], Loss: 0.6214


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [13/100], Loss: 0.6213


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [14/100], Loss: 0.6213


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [15/100], Loss: 0.6213


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [16/100], Loss: 0.6213


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [17/100], Loss: 0.6213


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [18/100], Loss: 0.6212


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [19/100], Loss: 0.6212


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [20/100], Loss: 0.6212


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [21/100], Loss: 0.6212


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [22/100], Loss: 0.6212


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [23/100], Loss: 0.6212


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [24/100], Loss: 0.6212


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [25/100], Loss: 0.6211


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [26/100], Loss: 0.6211


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [27/100], Loss: 0.6211


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [28/100], Loss: 0.6211


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [29/100], Loss: 0.6211


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [30/100], Loss: 0.6211


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [31/100], Loss: 0.6211


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [32/100], Loss: 0.6210


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [33/100], Loss: 0.6209


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [34/100], Loss: 0.6209


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [35/100], Loss: 0.6209


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [36/100], Loss: 0.6209


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [37/100], Loss: 0.6208


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [38/100], Loss: 0.6208


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [39/100], Loss: 0.6208


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [40/100], Loss: 0.6208


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [41/100], Loss: 0.6208


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [42/100], Loss: 0.6208


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [43/100], Loss: 0.6208


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [44/100], Loss: 0.6208


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [45/100], Loss: 0.6207


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [46/100], Loss: 0.6207


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [47/100], Loss: 0.6207


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [48/100], Loss: 0.6207


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [49/100], Loss: 0.6207


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [50/100], Loss: 0.6207


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [51/100], Loss: 0.6206


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [52/100], Loss: 0.6207


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [53/100], Loss: 0.6206


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [54/100], Loss: 0.6206


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [55/100], Loss: 0.6206


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [56/100], Loss: 0.6206


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [57/100], Loss: 0.6206


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [58/100], Loss: 0.6206


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [59/100], Loss: 0.6206


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [60/100], Loss: 0.6206


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [61/100], Loss: 0.6205


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [62/100], Loss: 0.6205


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [63/100], Loss: 0.6205


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [64/100], Loss: 0.6205


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [65/100], Loss: 0.6205


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [66/100], Loss: 0.6205


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [67/100], Loss: 0.6204


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [68/100], Loss: 0.6204


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [69/100], Loss: 0.6204


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [70/100], Loss: 0.6203


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [71/100], Loss: 0.6203


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [72/100], Loss: 0.6202


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [73/100], Loss: 0.6202


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [74/100], Loss: 0.6202


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [75/100], Loss: 0.6201


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [76/100], Loss: 0.6201


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [77/100], Loss: 0.6200


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [78/100], Loss: 0.6200


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [79/100], Loss: 0.6200


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [80/100], Loss: 0.6199


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [81/100], Loss: 0.6199


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [82/100], Loss: 0.6199


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [83/100], Loss: 0.6199


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [84/100], Loss: 0.6198


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [85/100], Loss: 0.6198


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [86/100], Loss: 0.6198


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [87/100], Loss: 0.6198


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [88/100], Loss: 0.6197


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [89/100], Loss: 0.6197


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [90/100], Loss: 0.6197


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [91/100], Loss: 0.6197


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [92/100], Loss: 0.6197


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [93/100], Loss: 0.6197


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [94/100], Loss: 0.6196


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [95/100], Loss: 0.6196


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [96/100], Loss: 0.6196


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [97/100], Loss: 0.6196


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [98/100], Loss: 0.6195


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [99/100], Loss: 0.6195


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [100/100], Loss: 0.6195


In [35]:
outputs

tensor([0.3310, 0.3337, 0.3208, 0.3295, 0.3299, 0.3313, 0.3295, 0.3298, 0.3302,
        0.3334, 0.3298, 0.3313, 0.3307, 0.3294, 0.3320, 0.3301, 0.3331, 0.3294,
        0.3290, 0.3307, 0.3298, 0.3326, 0.3304, 0.3308, 0.3298, 0.3300, 0.3301,
        0.3307, 0.3311, 0.3298, 0.3296, 0.3315, 0.3287, 0.3318, 0.3302, 0.3297,
        0.3198, 0.3296, 0.3263, 0.3224], device='cuda:0',
       grad_fn=<SqueezeBackward1>)

In [36]:
PATH = 'concatenate_lstm_model.pt'

# 모델 저장
torch.save(model.state_dict(), PATH)

In [37]:
%pwd

'/workspace/EmotionShortForm/lstm'

In [151]:
# GRU
for epoch in range(num_epochs):
    running_loss = 0.0
    for inputs, labels in tqdm_notebook(dataloader, total=len(dataloader), desc='Training'):
        if inputs.shape[1] != labels.shape[1] : continue
        
        inputs = inputs.transpose(0,1).to(device)
        labels = labels.squeeze(0).to(device)
        
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs).squeeze(1)
        loss = criterion(outputs, labels)

        # Backward and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, running_loss/len(dataloader)))


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [1/10], Loss: 0.6252


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [2/10], Loss: 0.6223


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [3/10], Loss: 0.6220


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [4/10], Loss: 0.6219


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [5/10], Loss: 0.6218


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [6/10], Loss: 0.6217


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [7/10], Loss: 0.6217


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [8/10], Loss: 0.6216


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [9/10], Loss: 0.6216


Training:   0%|          | 0/556 [00:00<?, ?it/s]

Epoch [10/10], Loss: 0.6216


In [152]:
outputs

tensor([-0.7756, -0.7756, -0.7756, -0.7756, -0.7756, -0.7756, -0.7756, -0.7756,
        -0.7756, -0.7756, -0.7756, -0.7756, -0.7756, -0.7756, -0.7756, -0.7756,
        -0.7756, -0.7756, -0.7756, -0.7756, -0.7756, -0.7756, -0.7756, -0.7756,
        -0.7756, -0.7756, -0.7756, -0.7756, -0.7756, -0.7756, -0.7756, -0.7756,
        -0.7756, -0.7756, -0.7756, -0.7756, -0.7756, -0.7756, -0.7756, -0.7756],
       device='cuda:0', grad_fn=<SqueezeBackward1>)

# Feature Extract

In [38]:
model = LSTMClassifier(input_size, hidden_size, num_layers, output_size).to(device)

# 모델 불러오기
model.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [40]:
from tqdm.notebook import tqdm_notebook

concatenate_features = []

count = 1
for inputs, labels in tqdm_notebook(dataloader,total=len(dataloader)):
    with torch.no_grad():
        model.eval()
#         if inputs.shape[1] != labels.shape[1] : continue
        
        inputs = inputs.transpose(0,1).to(device)
        labels = labels.squeeze(0).to(device)

        # Forward pass
        outputs,out = model(inputs)
        count += 1
        concatenate_features.append(out)

  0%|          | 0/556 [00:00<?, ?it/s]

In [43]:
len(concatenate_features), concatenate_features[0].shape

(556, torch.Size([250, 128]))

In [44]:
data_array = np.array(concatenate_features)

np.save('concatenate_features_array.npy',data_array)

In [45]:
x = np.load('concatenate_features_array.npy',allow_pickle=True)
x.shape

(556,)

In [46]:
x_list = x.tolist()
x_list[0].shape

torch.Size([250, 128])