In [1]:
import os
import sys

import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
import pandas as pd
from easydict import EasyDict

sys.path.append("../../../..")
from data.classification.toxic_comments.load import ToxicComtDataLoader
from src.graphs.models.toxic_comment import ToxicComtModel
from src.graphs.models.word_embedding import WordEmbedding

In [2]:
CONFIG = EasyDict({
    "model": {
        "sequence_encoder": {
            "cls": "rnn",
            "input_size": 50,
            "hidden_size": 32,
            "num_layers": 3
        },
        "output_layer": {
            "n_factor": 32,
            "n_target": 6
        }
    },
    "embedding": {
        "path_dir": "../../../../data/word_embedding/glove",
        "path_file": "glove.6B.50d.txt",
        "num_embeddings": 0,
        "embedding_dim": 50
    },
    "data": {
        "path_dir": "../../../../data/classification/toxic_comments",
        "path_file": "sample.csv",
        "pct_train": 0.7,
        "batch_size": 4,
        "min_freq": 4
    },
    "train": {
        "lr": 0.01
    }
})

In [3]:
dataloader = ToxicComtDataLoader(CONFIG.data)

In [4]:
batch_x, batch_y = next(iter(dataloader.loader_train))

In [5]:
vocab = dataloader.vocab
vocab.load_vectors(CONFIG.embedding.path_file, CONFIG.embedding.path_dir)


In [6]:
temp_embedding = WordEmbedding(CONFIG.embedding)
temp_embedding.from_pretrained(vocab.vocab)

In [7]:
model = ToxicComtModel(CONFIG.model)

In [8]:
model(temp_embedding(batch_x)) - batch_y

tensor([[0.4576, 0.4975, 0.5437, 0.4790, 0.4731, 0.4822],
        [0.4576, 0.4975, 0.5437, 0.4790, 0.4731, 0.4822],
        [0.4576, 0.4975, 0.5437, 0.4790, 0.4731, 0.4822],
        [0.4406, 0.5411, 0.5712, 0.5074, 0.4722, 0.4849]],
       grad_fn=<SubBackward0>)

In [None]:
torch.stack(batch_y)