In [None]:
!pip install scipy scikit-image torch tqdm transformers mtcnn mediapipe opencv-python torchvision numpy pandas timm evaluate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.21.1-py3-none-any.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 25.2 MB/s 
[?25hCollecting mtcnn
  Downloading mtcnn-0.1.1-py3-none-any.whl (2.3 MB)
[K     |████████████████████████████████| 2.3 MB 56.4 MB/s 
[?25hCollecting mediapipe
  Downloading mediapipe-0.8.10.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (32.9 MB)
[K     |████████████████████████████████| 32.9 MB 1.4 MB/s 
Collecting timm
  Downloading timm-0.6.7-py3-none-any.whl (509 kB)
[K     |████████████████████████████████| 509 kB 62.6 MB/s 
[?25hCollecting evaluate
  Downloading evaluate-0.2.2-py3-none-any.whl (69 kB)
[K     |████████████████████████████████| 69 kB 9.7 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 13.6 MB/s 
[

In [None]:
from google.colab import drive

drive.mount("/content/drive")
project_path = "/content/drive/MyDrive/NLP/MultiModalEmotionRecognition/"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd /content/drive/MyDrive/NLP/MultiModalEmotionRecognition/data

/content/drive/MyDrive/NLP/MultiModalEmotionRecognition/data


In [None]:
!ls

dev.zip		 image_index_val.txt  sentiment_val.txt  train_ende.zip
english_val.txt  images		      test.zip


In [None]:
!unzip dev.zip
!unzip test.zip
!unzip train_ende.zip

In [None]:
%mv dev/ images/val
%mv test/ images/test
%mv train/ images/train

In [None]:
%ls images/train -1 | wc -l

20240


In [None]:
import torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


#Face Embedding

In [None]:
from scipy.spatial.distance import euclidean
import math
from skimage.transform import rotate
from mtcnn import MTCNN
import mediapipe
import numpy as np
import pandas as pd
import cv2
import os
from PIL import Image
import torch
from torchvision import transforms
import urllib


def get_model_path(model_name):
    model_file = model_name + ".pt"
    cache_dir = os.path.join(os.path.expanduser("~"), ".hsemotions")
    # cache_dir = "emotion_models"
    os.makedirs(cache_dir, exist_ok=True)
    fpath = os.path.join(cache_dir, model_file)
    if not os.path.isfile(fpath):
        print(f"{model_file} not exists")
        url = (
            "https://github.com/HSE-asavchenko/face-emotion-recognition/blob/main/models/affectnet_emotions/"
            + model_file
            + "?raw=true"
        )
        print("Downloading", model_name, "from", url)
        urllib.request.urlretrieve(url, fpath)

    return fpath


class FaceAlignment:
    def __init__(
        self,
    ):
        pass

    @staticmethod
    def apply_rotation_on_images(input_images, angles):
        rotated_images = [
            rotate(image, angle) for image, angle in zip(input_images, angles)
        ]
        return rotated_images

    @staticmethod
    def compute_alignment_rotation_(eyes_coordinates):
        angles = []
        directions = []
        for left_eye_coordinate, right_eye_coordinate in eyes_coordinates:

            left_eye_x, left_eye_y = left_eye_coordinate
            right_eye_x, right_eye_y = right_eye_coordinate

            triangle_vertex = (
                (right_eye_x, left_eye_y)
                if left_eye_y > right_eye_y
                else (left_eye_x, right_eye_y)
            )
            direction = (
                -1 if left_eye_y > right_eye_y else 1
            )  # rotate clockwise else counter-clockwise

            # compute length of triangle edges
            a = euclidean(left_eye_coordinate, triangle_vertex)
            b = euclidean(right_eye_coordinate, triangle_vertex)
            c = euclidean(right_eye_coordinate, left_eye_coordinate)

            # cosine rule
            if (
                b != 0 and c != 0
            ):  # this multiplication causes division by zero in cos_a calculation
                cos_a = (b**2 + c**2 - a**2) / (2 * b * c)
                angle = np.arccos(cos_a)  # angle in radian
                angle = (angle * 180) / math.pi  # radian to degree
            else:
                angle = 0

            angle = angle - 90 if direction == -1 else angle

            angles.append(angle)
            directions.append(direction)

        return angles, directions


class FaceDetection:

    # first call extract_face
    def __init__(self, model_name, minimum_confidence):

        self.detected_faces_information = None
        self.model_name = model_name
        self.minimum_confidence = minimum_confidence

        if model_name == "MTCNN":
            detector_model = MTCNN()
            self.detect_faces_function = (
                lambda input_image: detector_model.detect_faces(input_image)
            )

    def extract_faces(self, input_image, return_detections_information=True):
        self.detect_faces__(input_image)
        faces = self.get_faces__(
            input_image,
        )
        if return_detections_information:
            return faces, self.detected_faces_information

        else:
            return faces

    def detect_faces__(self, input_image):
        detections = self.detect_faces_function(input_image)
        self.detected_faces_information = list(
            filter(
                lambda element: element["confidence"] > self.minimum_confidence,
                detections,
            )
        )

    def get_detected_faces_information(self):
        return self.detected_faces_information

    def get_keypoints(
        self,
    ):
        return list(
            map(lambda element: element["keypoints"], self.detected_faces_information)
        )

    def get_faces__(
        self,
        input_image,
    ):
        boxes = [
            detection_information["box"]
            for detection_information in self.detected_faces_information
        ]
        y1y2x1x2 = [(int(y), int(y + h), int(x), int(x + w)) for x, y, w, h in boxes]
        faces = [input_image[y1:y2, x1:x2] for y1, y2, x1, x2 in y1y2x1x2]
        return faces

    def get_eyes_coordinates(
        self,
    ):
        eyes_coordinates = [
            (info["keypoints"]["left_eye"], info["keypoints"]["right_eye"])
            for info in self.detected_faces_information
        ]
        return eyes_coordinates


class FaceEmotionRecognizer:
    # supported values of model_name: enet_b0_8_best_vgaf, enet_b0_8_best_afew, enet_b2_8, enet_b0_8_va_mtl, enet_b2_7
    def __init__(self, device, model_name="enet_b0_8_best_vgaf"):
        self.device = device
        self.is_mtl = "_mtl" in model_name
        if "_7" in model_name:
            self.idx_to_class = {
                0: "Anger",
                1: "Disgust",
                2: "Fear",
                3: "Happiness",
                4: "Neutral",
                5: "Sadness",
                6: "Surprise",
            }
        else:
            self.idx_to_class = {
                0: "Anger",
                1: "Contempt",
                2: "Disgust",
                3: "Fear",
                4: "Happiness",
                5: "Neutral",
                6: "Sadness",
                7: "Surprise",
            }

        self.img_size = 224 if "_b0_" in model_name else 260
        self.test_transforms = transforms.Compose(
            [
                transforms.Resize((self.img_size, self.img_size)),
                transforms.ToTensor(),
                transforms.Normalize(
                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
                ),
            ]
        )

        path = get_model_path(model_name)

        model = torch.load(path)
        model = model.to(device)

        if isinstance(model.classifier, torch.nn.Sequential):
            self.classifier_weights = model.classifier[0].weight.data
            self.classifier_bias = model.classifier[0].bias.data
        else:
            self.classifier_weights = model.classifier.weight.data
            self.classifier_bias = model.classifier.bias.data

        model.classifier = torch.nn.Identity()
        self.model = model.eval()
        print(path, self.test_transforms)

    def compute_probability(self, features):
        return torch.matmul(features, self.classifier_weights.T) + self.classifier_bias

    def extract_representations_from_faces(self, input_faces):
        faces = [self.test_transforms(Image.fromarray(face)) for face in input_faces]
        features = self.model(torch.stack(faces, dim=0).to(self.device))
        return features

    def predict_emotions_from_representations(
        self, representations, logits=True, return_features=True
    ):
        scores = self.compute_probability(representations)
        if self.is_mtl:
            predictions_indices = torch.argmax(scores[:, :-2], dim=1)

        else:
            predictions_indices = torch.argmax(scores, dim=1)

        if self.is_mtl:
            x = scores[:, :-2]

        else:
            x = scores
        pred = torch.argmax(x[0])

        if not logits:
            e_x = torch.exp(x - torch.max(x, dim=1)[:, None])
            e_x = e_x / e_x.sum(dim=1)[:, None]
            if self.is_mtl:
                scores[:, :-2] = e_x
            else:
                scores = e_x

        return [
            self.idx_to_class[pred.item()] for pred in (predictions_indices)
        ], scores


class FaceNormalizer:
    def __init__(self):
        self.mp_face_mesh = mediapipe.solutions.face_mesh
        face_mesh = self.mp_face_mesh.FaceMesh(static_image_mode=True)

        mp_face_mesh = mediapipe.solutions.face_mesh
        self.face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True)
        self.routes_idx = self.initialize__()

    def initialize__(self):
        df = pd.DataFrame(
            list(self.mp_face_mesh.FACEMESH_FACE_OVAL), columns=["p1", "p2"]
        )
        routes_idx = []

        p1 = df.iloc[0]["p1"]
        p2 = df.iloc[0]["p2"]

        for i in range(0, df.shape[0]):
            obj = df[df["p1"] == p2]
            p1 = obj["p1"].values[0]
            p2 = obj["p2"].values[0]

            route_idx = []
            route_idx.append(p1)
            route_idx.append(p2)
            routes_idx.append(route_idx)

        return routes_idx

    def get_landmarks__(self, input_image: np.ndarray):
        if input_image.dtype == np.float:
            input_image = (input_image * 255).astype(np.uint8)

        results = self.face_mesh.process(input_image)
        landmarks = results.multi_face_landmarks[0]

        routes = []
        # for source_idx, target_idx in mp_face_mesh.FACEMESH_FACE_OVAL:
        for source_idx, target_idx in self.routes_idx:
            source = landmarks.landmark[source_idx]
            target = landmarks.landmark[target_idx]

            relative_source = (
                int(input_image.shape[1] * source.x),
                int(input_image.shape[0] * source.y),
            )
            relative_target = (
                int(input_image.shape[1] * target.x),
                int(input_image.shape[0] * target.y),
            )

            # cv2.line(img, relative_source, relative_target, (255, 255, 255), thickness = 2)

            routes.append(relative_source)
            routes.append(relative_target)

        return routes

    @staticmethod
    def normalize_with_landmark_points__(input_image, landmarks):
        mask = np.zeros((input_image.shape[0], input_image.shape[1]))
        mask = cv2.fillConvexPoly(mask, np.array(landmarks), 1)
        mask = mask.astype(bool)

        out = np.zeros_like(input_image)
        out[mask] = input_image[mask]
        return out

    def normalize_faces_image(self, input_images):
        normalized_faces_images = [
            self.normalize_with_landmark_points__(
                input_image, self.get_landmarks__(input_image)
            )
            for input_image in input_images
        ]
        return normalized_faces_images


class EmotionRepresentationExtractor:
    def __init__(
        self,
    ):
        self.face_detection_model: FaceDetection = None
        self.face_alignment_model: FaceAlignment = None
        self.face_normalizer_model: FaceNormalizer = None
        self.face_emotion_recognition_model: FaceEmotionRecognizer = None

        self.faces = None
        self.normalized_rotated_faces = None
        self.rotated_faces = None
        self.rotation_angles = None
        self.rotation_directions = None

    def set_face_detection_model(self, face_detection_model):
        self.face_detection_model = face_detection_model
        return self

    def set_face_alignment_model(self, face_alignment_model):
        self.face_alignment_model = face_alignment_model
        return self

    def set_face_normalizer_model(self, face_normalizer_model):
        self.face_normalizer_model = face_normalizer_model
        return self

    def set_face_emotion_recognition_model(self, face_emotion_recognition_model):
        self.face_emotion_recognition_model = face_emotion_recognition_model
        return self

    def extract_representation(self, input_image):
        faces, detected_faces_information = self.face_detection_model.extract_faces(
            input_image, return_detections_information=True
        )
        (
            rotation_angles,
            rotation_directions,
        ) = self.face_alignment_model.compute_alignment_rotation_(
            self.face_detection_model.get_eyes_coordinates()
        )
        rotated_faces = self.face_alignment_model.apply_rotation_on_images(
            faces, rotation_angles
        )
        normalized_rotated_faces = self.face_normalizer_model.normalize_faces_image(
            rotated_faces
        )

        normalized_rotated_faces_255 = [
            (image * 255).astype(np.uint8) for image in normalized_rotated_faces
        ]

        representations = (
            self.face_emotion_recognition_model.extract_representations_from_faces(
                normalized_rotated_faces_255
            )
        )
        (
            predictions,
            scores,
        ) = self.face_emotion_recognition_model.predict_emotions_from_representations(
            representations
        )

        self.faces = faces
        self.rotation_angles, self.rotation_directions = (
            rotation_angles,
            rotation_directions,
        )
        self.rotated_faces = rotated_faces
        self.normalized_rotated_faces = normalized_rotated_faces_255

        return predictions, scores, representations

    def get_rotations_information(self):
        return self.rotation_angles, self.rotation_directions

    def get_faces(self):
        return self.faces

    def get_rotated_faces(self):
        return self.rotated_faces

    def get_normalized_rotated_faces(self):
        return self.normalized_rotated_faces

    def clear(self):
        self.faces = None
        self.normalized_rotated_faces = None
        self.rotated_faces = None
        self.rotation_angles = None
        self.rotation_directions = None

    def store_embeddings(self, file, embeddings):
        with open(file, "wb") as file_out:
            pickle.dump(
                {"embeddings": embeddings}, file_out, protocol=pickle.HIGHEST_PROTOCOL
            )

    def load_embeddings(self, file):
        with open(file, "rb") as file_in:
            stored_data = pickle.load(file_in)
            stored_embeddings = stored_data["embeddings"]

        return stored_embeddings

#Text Embedding


In [None]:
from transformers import AutoTokenizer, AutoModel, pipeline
from transformers import RobertaForSequenceClassification
import torch
import pickle


class TextEmbeddingExtractor:
    def __init__(
        self,
        model_name="pysentimiento/robertuito-sentiment-analysis",
        batch_size=250,
        show_progress_bar=True,
        to_tensor=True,
        max_length=128,
    ):
        self.model_name = model_name

        self.device = device

        self.batch_size = batch_size
        self.show_progress_bar = show_progress_bar
        self.to_tensor = to_tensor

        self.max_length = max_length

        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        # self.model = AutoModel.from_pretrained(self.model_name).to(self.device)

        self.model = RobertaForSequenceClassification.from_pretrained(
            self.model_name, num_labels=3, output_hidden_states=True
        ).to(self.device)

        # C1
        self.generator = pipeline(
            task="sentiment-analysis",
            model=self.model,
            tokenizer=self.tokenizer,
        )

    def extract_embedding(
        self,
        input_batch_sentences,
    ):
        encoded_input = self.tokenizer(
            input_batch_sentences,
            padding=True,
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt",
        ).to(self.device)

        with torch.no_grad():
            model_output = self.model(**encoded_input)
            hidden_states = model_output["hidden_states"]
            last_layer_hidden_states = hidden_states[
                12
            ]  # 12 = len(hidden_states) , dim = (batch_size, seq_len, 768)
            cls_hidden_state = last_layer_hidden_states[:, 0, :]

        return cls_hidden_state

    def get_labels(self, input_batch_sentences):
        return self.generator(input_batch_sentences)

    @staticmethod
    def store_embeddings(file, embeddings):
        with open(file, "wb") as file_out:
            pickle.dump(
                {"embeddings": embeddings}, file_out, protocol=pickle.HIGHEST_PROTOCOL
            )

    @staticmethod
    def load_embeddings(file):
        with open(file, "rb") as file_in:
            stored_data = pickle.load(file_in)
            stored_embeddings = stored_data["embeddings"]

        return stored_embeddings

#Dataset

In [None]:
import os, cv2, torch, ast
import pandas as pd
import numpy as np
from pathlib import Path
from torch.utils.data import Dataset


class MSCTDDataSet(Dataset):
    """MSCTD dataset."""

    def __init__(self, data_size, base_path="data/", dataset_type="train"):
        """
        Args:
            base_path (str): path to data folder
            dataset_type (str): dev, train, test
        """
        base_path = Path(base_path)
        self.text_path = base_path / f"english_{dataset_type}.txt"
        self.image_index_path = base_path / f"image_index_{dataset_type}.txt"
        self.sentiment_path = base_path / f"sentiment_{dataset_type}.txt"
        self.image_dir = base_path / "images" / dataset_type
        self.data_size = data_size
        self.data_info = self.read_info()
        self.image_pad = 10
        self.face_embedding_extractor = self.get_face_embedding_extractor()

    def get_face_embedding_extractor(self):
        fd = FaceDetection("MTCNN", minimum_confidence=0.95)
        fa = FaceAlignment()
        fn = FaceNormalizer()
        model_name = "enet_b0_8_best_afew"
        fer = FaceEmotionRecognizer("cuda:0", model_name)
        fre = (
            EmotionRepresentationExtractor()
            .set_face_detection_model(fd)
            .set_face_alignment_model(fa)
            .set_face_normalizer_model(fn)
            .set_face_emotion_recognition_model(fer)
        )
        return fre

    def read_info(self):
        with open(self.text_path) as f:
            texts = [t.strip() for t in f.readlines()]
        with open(self.image_index_path) as f:
            images = [ast.literal_eval(t.strip()) for t in f.readlines()]

        with open(self.sentiment_path) as f:
            sentiments = [int(t.strip()) for t in f.readlines()]
        texts = texts[: self.data_size]
        images = images[: self.data_size]
        sentiments = sentiments[: self.data_size]
        df = pd.DataFrame(
            [texts, images, sentiments], index=["text", "image", "sentiment"]
        ).transpose()
        return df

    def __len__(self):
        return self.data_info.shape[0]

    def get_face_features(self, idx):
        img_name = self.image_dir / f"{idx}.jpg"
        image = cv2.imread(str(img_name))[:, :, ::-1]
        (
            predictions,
            scores,
            representations,
        ) = self.face_embedding_extractor.extract_representation(image)
        return representations[0]

    def get_sentiment(self, sentiment):
        return sentiment

    def get_text(self, text):
        return text

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        data = self.data_info.iloc[idx]
        try:
            face = self.get_face_features(idx)
        except (TypeError, RuntimeError):
            face = -100 * torch.ones(1280).to(device)
        sentiment = self.get_sentiment(data["sentiment"])
        text = self.get_text(data["text"])
        sample = {"face": face, "text": text, "sentiment": sentiment}

        return sample

#Data Loader

In [None]:
from torch.utils.data import DataLoader


class MSCTDDataLoader:
    def __init__(self, dl, device, tokenizer=None, text_len=512):
        self.dl = dl
        self.device = device
        self.text_embedding_extractor = TextEmbeddingExtractor()

    def __iter__(self):
        for b in self.dl:
            b["text_embedding"] = self.text_embedding_extractor.extract_embedding(
                b["text"]
            )
            # if self.tokenizer:
            #     b["text"] = self.tokenizer(
            #         b["text"],
            #         padding="max_length",
            #         max_length=self.text_len,  # including [CLS] end [SEP]
            #         truncation=True,
            #         return_tensors="pt",
            #         # return_offsets_mapping=True,
            #     )
            yield to_device(b, self.device)

    def __len__(self):
        return len(self.dl)


def to_device(data, device):
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    if isinstance(data, dict):
        return {k: to_device(v, device) for k, v in data.items()}
    if isinstance(data, str):
        return data
    return data.to(device)

In [None]:
import torch
from torch import nn


class SimpleDenseNetwork(nn.Module):
    def __init__(self, n_classes, embedding_dimension):
        super(SimpleDenseNetwork, self).__init__()

        self.n_classes = n_classes
        self.embedding_dimension = embedding_dimension

        self.fc = nn.Sequential(
            nn.Linear(
                in_features=self.embedding_dimension,
                out_features=512,
            ),
            nn.ReLU(inplace=True),
            nn.Linear(in_features=512, out_features=128),
            nn.ReLU(inplace=True),
            nn.Linear(in_features=128, out_features=3),
            nn.ReLU(inplace=True),
            nn.Softmax(),
        )

    def forward(self, input_batch):
        x = input_batch
        x = self.fc(x)
        output_batch = x

        return output_batch

#Train

In [None]:
BATCH_SIZE = 32
num_workers = 1
EPOCHS = 1
embedding_dimension = 2048
learning_rate = 0.1
momentum = 0.9
data_size = 1000

In [None]:
import torch.optim as optim
from tqdm import trange
from datetime import datetime
from torch.utils.data import DataLoader
from transformers import AutoTokenizer


def train_epoch(epoch_index, model, dataloader, loss_fn, optimizer):
    running_loss = 0.0
    last_loss = 0.0

    for data_pair_index, batch in enumerate(dataloader):
        print("--------------", data_pair_index, "-------------")
        text_embedding = batch["text_embedding"]
        face_embedding = batch["face"]
        # pose_embeding = batch["pose_embeding"]
        labels = batch["sentiment"]
        optimizer.zero_grad()

        outputs = model(torch.cat((text_embedding, face_embedding), 1))

        loss = loss_fn(outputs, labels)
        loss.backward()

        optimizer.step()

        # Gather data and report
        running_loss += loss.item()
        if data_pair_index % 1000 == 999:
            last_loss = running_loss / 1000  # loss per batch
            print("  batch {} loss: {}".format(data_pair_index + 1, last_loss))
            tb_x = epoch_index * len(dataloader) + data_pair_index + 1
            print("Loss/train", last_loss, tb_x)
            running_loss = 0.0

    return last_loss


def train_model(model, epochs, train_dataloader, val_dataloader):
    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
    for epoch in trange(epochs):
        model.train()
        train_epoch(epoch, model, train_dataloader, loss_fn, optimizer)
        model.eval()
        validate(model, val_dataloader, loss_fn)

    return model

In [None]:
# model = SimpleDenseNetwork(n_classes=3, embedding_dimension=embedding_dimension).to(device=device)

In [None]:
val_dataset = MSCTDDataSet(data_size, project_path + "data/", "val")
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
val_dataloader = MSCTDDataLoader(val_dataloader, device)

test_dataset = MSCTDDataSet(5000, project_path + "data/", "test")
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE)
test_dataloader = MSCTDDataLoader(test_dataloader, device)

/root/.hsemotions/enet_b0_8_best_afew.pt Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=None)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)
/root/.hsemotions/enet_b0_8_best_afew.pt Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=None)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)


In [None]:
model = train_model(model, EPOCHS, val_dataloader, test_dataloader)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations


-------------- 0 -------------


  input = module(input)


-------------- 1 -------------
-------------- 2 -------------
-------------- 3 -------------
-------------- 4 -------------


#Evaluating

In [None]:
import evaluate

accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")


def validate(model, dataloader, loss_fn):
    running_loss = 0.0
    last_loss = 0.0

    for data_pair_index, batch in enumerate(dataloader):
        print("--------------", data_pair_index, "-------------")
        text_embedding = batch["text_embedding"]
        face_embedding = batch["face"]
        # pose_embeding = batch["pose_embeding"]
        labels = batch["sentiment"]

        logits = model(torch.cat((text_embedding, face_embedding), 1))
        # print(outputs)
        accuracy.add_batch(predictions=logits.argmax(dim=1), references=labels)
        precision.add_batch(predictions=logits.argmax(dim=1), references=labels)
        loss = loss_fn(logits, labels)
        running_loss += loss.item()
        # print(running_loss)
        # print('true answer',labels)
        # print('prediction',logits.argmax(dim=1))
        # if data_pair_index==2:
        #   break
    print(accuracy.compute())
    print(precision.compute(average=None))

In [None]:
validate(model, test_dataloader, nn.CrossEntropyLoss())

-------------- 0 -------------
-------------- 1 -------------
-------------- 2 -------------
-------------- 3 -------------
-------------- 4 -------------
-------------- 5 -------------
-------------- 6 -------------
-------------- 7 -------------
-------------- 8 -------------
-------------- 9 -------------
-------------- 10 -------------
-------------- 11 -------------
-------------- 12 -------------
-------------- 13 -------------
-------------- 14 -------------
-------------- 15 -------------
-------------- 16 -------------
-------------- 17 -------------
-------------- 18 -------------
-------------- 19 -------------
-------------- 20 -------------
-------------- 21 -------------
-------------- 22 -------------
-------------- 23 -------------
-------------- 24 -------------
-------------- 25 -------------
-------------- 26 -------------
-------------- 27 -------------
-------------- 28 -------------
-------------- 29 -------------
-------------- 30 -------------
-------------- 31 

  _warn_prf(average, modifier, msg_start, len(result))
