In [1]:
import sys
sys.path.append("../../vendor/graph4nlp")

In [2]:
import os
import time
import yaml

import requests
from bs4 import BeautifulSoup
from selenium.webdriver import Chrome
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader

from graph4nlp.pytorch.data.dataset import Text2LabelDataset
from graph4nlp.pytorch.modules.evaluation.accuracy import Accuracy
from graph4nlp.pytorch.modules.graph_construction import ConstituencyBasedGraphConstruction
from graph4nlp.pytorch.modules.graph_construction import DependencyBasedGraphConstruction
from graph4nlp.pytorch.modules.graph_construction.embedding_construction import WordEmbedding
from graph4nlp.pytorch.modules.graph_embedding import GAT
from graph4nlp.pytorch.modules.graph_embedding import GraphSAGE
from graph4nlp.pytorch.modules.loss.general_loss import GeneralLoss
from graph4nlp.pytorch.modules.prediction.classification.graph_classification import FeedForwardNN
from graph4nlp.pytorch.modules.utils import constants as Constants
from graph4nlp.pytorch.modules.utils.logger import Logger
from graph4nlp.pytorch.modules.utils.generic_utils import EarlyStopping

Using backend: pytorch


## Step 1: Define Dataset
Retrieve data for training and testing. Visit the movie URL and gather all critic reviews (test set) and about 4 times as many audience reviews (train set). The requests package is used to get the HTML content of the original URLs. BeautifulSoup is used to extract information from the HTML documents. Selenium is used for navigating the website (e.g., clicking buttons and retrieving updated HTML)

This demo uses *The Day After Tomorrow* for the movie because it already has a decently even split of positive and negative reviews. This will make it easier to prune the data until it well-balanced.

### Data Layout
The audience reviews are used for training since there are more, so they go into `train.txt`. The critic reviews are used for testing, so they go into `test.txt`.
Each review will be on its own line, followed by a tab and either "POS" or "NEG"

In [3]:
class RTDataset(Text2LabelDataset):
    # Define raw and processed file names to prevent NotImplementedError being raised
    @property
    def raw_file_names(self):
        return {"train": "train.txt", "test": "test.txt"}
    
    @property
    def processed_file_names(self):
        return {"vocab": "vocab.pt", "data": "data.pt"}
    
    def __init__(self, root_dir, topology_builder=None, topology_subdir=None, graph_type='static',
                 pretrained_word_emb_name="840B", pretrained_word_emb_url=None,
                 edge_strategy=None, merge_strategy='tailhead', max_word_vocab_size=None,
                 min_word_vocab_freq=1, word_emb_size=None, **kwargs):
        super(RTDataset, self).__init__(root_dir=root_dir, topology_builder=topology_builder,
                                          topology_subdir=topology_subdir, graph_type=graph_type,
                                          edge_strategy=edge_strategy, merge_strategy=merge_strategy,
                                          max_word_vocab_size=max_word_vocab_size,
                                          min_word_vocab_freq=min_word_vocab_freq,
                                          pretrained_word_emb_name=pretrained_word_emb_name,
                                          pretrained_word_emb_url=pretrained_word_emb_url,
                                          word_emb_size=word_emb_size, **kwargs)
    
    # Find and click the button that leads to the next page of reviews, and return the HTML source of that page
    def _click_next_button(self, driver, button_xpath):
        # Make sure the button is loaded on the page before trying to find it
        next_button = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, button_xpath)))
        next_button.click()
        return BeautifulSoup(driver.page_source, "lxml")

    # Takes in the filepath and an array of strings (each element is a line), and writes each element to the file
    def _write_dataset_to_file(self, filepath, text):
        # Encode the text as UTF-8
        with open(filepath, "w", encoding="utf-8") as f:
            f.writelines(text)
            
    # Balances a train or test file to have equal amounts of positive and negative layers
    def _balance_file(self, filepath, num_pos, total):
        num_neg = total - num_pos
        
        # Read all the lines from the current file
        with open(filepath, "r", encoding="utf-8") as f:
            lines = f.readlines()
            
        # Now, only write back the lines that do not make the set unbalanced
        with open(filepath, "w", encoding="utf-8") as f:
            for line in lines:
                if (num_pos > num_neg and "\tPOS" in line):
                    num_pos -= 1
                elif (num_neg > num_pos and "\tNEG" in line):
                    num_neg -= 1
                else:
                    f.write(line)

    def download(self):
        critic_url = "https://www.rottentomatoes.com/m/day_after_tomorrow/reviews"
        critic_req = requests.get(critic_url)
        critic_soup = BeautifulSoup(critic_req.content, "lxml")
        critic_num_pages = 11
        critic_total = 0 # The total number of critic reviews
        critic_pos = 0 # The number of positive critic reviews
        critic_data = []

        audience_url = "https://www.rottentomatoes.com/m/day_after_tomorrow/reviews?type=user"
        audience_req = requests.get(audience_url)
        audience_soup = BeautifulSoup(audience_req.content, "lxml")
        audience_num_pages = 200
        audience_total = 0 # The total number of audience reviews taken
        audience_pos = 0 # The number of positive audience reviews
        audience_data = []

        driver = Chrome(executable_path=r"../../bin/chromedriver_win32/chromedriver.exe")    
        driver.get(critic_url)

        # All of the review information, including its positive or negative tag, is included in the review_container
        # Positive or negative review is determined by fresh or rotten icon
        # Review is under class review_text
        for page in range(critic_num_pages):
            for div in critic_soup.find_all("div", attrs={"class": "col-xs-16 review_container"}):
                text = div.find("div", attrs={"class": "the_review"}).text.strip().replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')

                if text:
                    review = "POS" if div.find("div", attrs={"class": "review_icon icon small fresh"}) else "NEG"
                    critic_total += 1

                    if review == "POS":
                        critic_pos += 1

                    critic_data.append(text + '\t' + review + '\n')

            # Navigate to the next page of reviews
            if page < critic_num_pages - 1:
                critic_soup = self._click_next_button(driver, "//*[@id='content']/div/div/div/nav[1]/button[2]")

        critic_freshness = critic_pos / critic_total

        driver.get(audience_url)

        # For audience reviews, the stars are each icons, either filled, empty, or half, so keep a star count. Then, if the review has >= 3.5 stars, it is positive
        for page in range(audience_num_pages):
            for div in audience_soup.find_all("div", attrs={"class": "audience-reviews__review-wrap"}):
                text = div.find("p", attrs={"class": "audience-reviews__review js-review-text clamp clamp-8 js-clamp"}).text.strip().replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')

                if text:
                    score = len(div.find_all("span", attrs={"class": "star-display__filled"})) + 0.5 * len(div.find_all("span", attrs={"class": "star-display__half"}))
                    review = "POS" if score >= 3.5 else "NEG"
                    audience_total += 1

                    if review == "POS":
                        audience_pos += 1

                    audience_data.append(text + '\t' + review + '\n')

            if page < audience_num_pages - 1:
                audience_soup = self._click_next_button(driver, "//*[@id='content']/div/div/nav[3]/button[2]")

        audience_freshness = audience_pos / audience_total

        self._write_dataset_to_file(self.raw_dir + "/test.txt", critic_data)
        self._write_dataset_to_file(self.raw_dir + "/train.txt", audience_data)
        self._balance_file(self.raw_dir + "/test.txt", critic_pos, critic_total)
        self._balance_file(self.raw_dir + "/train.txt", audience_pos, audience_total)

        # Write a stats file containing the actual stats retrieved from the web scrape (to be compared against the model's guesses)
        with open(self.root + "actual_stats.txt", "w", encoding="utf-8") as f:
            f.write("## Critic Stats ##\n")
            f.write("Total Critic Reviews: {}\n".format(critic_total))
            f.write("Positive Critic Reviews: {}\n".format(critic_pos))
            f.write("Negative Critic Reviews: {}\n".format(critic_total - critic_pos))
            f.write("Critic Freshness: {:.0%}\n\n".format(critic_freshness))

            f.write("## Audience Stats ##\n")
            f.write("Total Audience Reviews: {}\n".format(audience_total))
            f.write("Positive Audience Reviews: {}\n".format(audience_pos))
            f.write("Negative Audience Reviews: {}\n".format(audience_total - audience_pos))
            f.write("Audience Freshness: {:.0%}\n".format(audience_freshness))

## Step 2: Define the Model
The model inherits from Pytorch's nn.Module class. The dataset's vocabulary as well as the configuration data will be passed into it. The configuration data is used to define many parameters of the model, such as the graph type, GNN, learning rate, number of hidden layers, and many more. Note that some values, such as the number of classes in the dataset are added to the configuration automatically and are not defined by the user.

The `__init()__` method uses the configuration information to build the graph topology and GNN. It also attaches a feedforward neural network and a loss function.

The `forward()` method generates logits (output tensors), and it also returns loss if that is required. This function is does not have to be called manually. This method calculates the gradient descent based on the graph topology, and passes this to the feedforward network to generate logits. The loss is calculated based on the difference between the ouput tensors and target tensors.

In [4]:
class SentimentAnalyzer(nn.Module):
    def __init__(self, vocab, config):
        super(SentimentAnalyzer, self).__init__()
        self.config = config
        self.vocab = vocab
        embedding_style = {'single_token_item': True if config['graph_type'] != 'ie' else False,
                            'emb_strategy': config.get('emb_strategy', 'w2v_bilstm'),
                            'num_rnn_layers': 1,
                            'bert_model_name': config.get('bert_model_name', 'bert-base-uncased'),
                            'bert_lower_case': True
                           }
        
        if config["graph_type"] == "constituency":
            self.graph_topology = ConstituencyBasedGraphConstruction(embedding_style=embedding_style,
                                                                     vocab=vocab.in_word_vocab,
                                                                     hidden_size=config["num_hidden"],
                                                                     word_dropout=config["word_dropout"],
                                                                     rnn_dropout=config["rnn_dropout"],
                                                                     fix_word_emb=not config["no_fix_word_emb"],
                                                                     fix_bert_emb=not config.get("no_fix_bert_emb", False))
        
        elif config["graph_type"] == "dependency":
            self.graph_topology = DependencyBasedGraphConstruction(embedding_style=embedding_style,
                                                                     vocab=vocab.in_word_vocab,
                                                                     hidden_size=config["num_hidden"],
                                                                     word_dropout=config["word_dropout"],
                                                                     rnn_dropout=config["rnn_dropout"],
                                                                     fix_word_emb=not config["no_fix_word_emb"],
                                                                     fix_bert_emb=not config.get("no_fix_bert_emb", False))
            
        else:
            raise RuntimeError("Unknown/unsupported graph_type: {}".format(config["graph_type"]))
        
        
        if "w2v" in self.graph_topology.embedding_layer.word_emb_layers:
            self.word_emb = self.graph_topology.embedding_layer.word_emb_layers["w2v"].word_emb_layer
        else:
            self.word_emb = WordEmbedding(self.vocab.in_word_vocab.embeddings.shape[0],
                                          self.vocab.in_word_vocab.embeddings.shape[1],
                                          pretrained_word_emb=self.vocab.inword_vocab.embeddings,
                                          fix_emb=not config["no_fix_word_emb"]
                                         ).word_emb_layer
        
        if config["gnn"] == "gat":
            heads = [config['gat_num_heads']] * (config['gnn_num_layers'] - 1) + [config['gat_num_out_heads']]
            self.gnn = GAT(config['gnn_num_layers'],
                        config['num_hidden'],
                        config['num_hidden'],
                        config['num_hidden'],
                        heads,
                        direction_option=config['gnn_direction_option'],
                        feat_drop=config['gnn_dropout'],
                        attn_drop=config['gat_attn_dropout'],
                        negative_slope=config['gat_negative_slope'],
                        residual=config['gat_residual'],
                        activation=F.elu)
        
        elif config["gnn"] == "graphsage":
            self.gnn = GraphSAGE(config["gnn_num_layers"],
                                 config["num_hidden"],
                                 config["num_hidden"],
                                 config["num_hidden"],
                                 config["graphsage_aggregate_type"],
                                 direction_option=config["gnn_direction_option"],
                                 feat_drop=config["gnn_dropout"],
                                 bias=True,
                                 norm=None,
                                 activation=F.relu,
                                 use_edge_weight=False)
        else:
            raise RuntimeError("Unknown/unsupported gnn type: {}".format(config["gnn"]))
            
        self.analyzer = FeedForwardNN(2 * config["num_hidden"] if config["gnn_direction_option"] == "bi_sep" else config["num_hidden"],
                                        config["num_classes"],
                                        [config["num_hidden"]],
                                        graph_pool_type=config["graph_pooling"],
                                        dim=config["num_hidden"],
                                        use_linear_proj=config["max_pool_linear_proj"])
        
        self.loss = GeneralLoss("CrossEntropy")
        
    def forward(self, graph_list, tgt=None, require_loss=True):
        batch_gradient_descent = self.graph_topology(graph_list)
        
        self.gnn(batch_gradient_descent)
        
        self.analyzer(batch_gradient_descent)
        logits = batch_gradient_descent.graph_attributes["logits"]
        
        if require_loss:
            loss = self.loss(logits, tgt)
            return logits, loss
        else:
            return logits

## Step 3: Define Model Handler
The `ModelHandler`  will control the training, evaluation, and testing of the model, and it will provide some utilities to facilitate this. The `ModelHandler` contains the dataset and the model. It defines methods for loading the dataset and creating data loaders for the train, validation, and test sets. It builds the optimizer for the model and defines the evaluation metric. It also defines the `train()`, `evaluate()`, and `test()` functions for the model.

The `ModelHandler` also creates a logger to write the performance history of the model to a file. Additionally, it provides training utilites such as an `EarlyStopper`, which will stop the training if the model has not improved since some patience threshold, as well as the ability to reduce the learning rate if the model has not improved recently.

In [5]:
class ModelHandler:
    def __init__(self, config):
        super(ModelHandler, self).__init__()
        self.config = config
        
        # Create a logger at the directory specified in the config file.
        # The logger will write config and model performance information into a log file
        self.logger = Logger(self.config["out_dir"], config={k:v for k, v in self.config.items() if k != "device"}, overwrite=True)
        self.logger.write(self.config["out_dir"]) # Log config information
        
        self._build_dataloader()
        self._build_model()
        self._build_optimizer()
        self._build_evaluation()
        
    def _build_dataloader(self):
        if self.config["graph_type"] == "constituency":
            topology_builder = ConstituencyBasedGraphConstruction
            graph_type = "static"
            merge_strategy = "tailhead"
        elif self.config["graph_type"] == "dependency":
            topology_builder = DependencyBasedGraphConstruction
            graph_type = "static"
            merge_strategy = "tailhead"
        else:
            raise RuntimeError("Unknown/unsupported graph_type: {}".format(self.config["graph_type"]))
        
        topology_subdir = "{}_graph".format(self.config["graph_type"])
        
        dataset = RTDataset(root_dir="../../data",
                            pretrained_word_emb_name=self.config.get("pretrained_word_emb_name", "840B"),
                            merge_strategy=merge_strategy,
                            seed=self.config["seed"],
                            thread_number=4,
                            port=9000,
                            timeout=15000,
                            word_emb_size=300,
                            graph_type=graph_type, topology_builder=topology_builder,
                            topology_subdir=topology_subdir,
                            dynamic_graph_type=None,
                            dynamic_init_topology_builder=None,
                            dynamic_init_topology_aux_args={"dummy_param": 0})
        
        self.train_dataloader = DataLoader(dataset.train, batch_size=self.config["batch_size"], 
                                           shuffle=True,
                                           num_workers=self.config["num_workers"],
                                           collate_fn=dataset.collate_fn)
        
        # Create a validation set if one is specified. If not, the validation set is the same as the test set
        if hasattr(dataset, "val") == False:
            dataset.val = dataset.test
        self.val_dataloader = DataLoader(dataset.val, batch_size=self.config["batch_size"],
                                         shuffle=False,
                                         num_workers=self.config["num_workers"],
                                         collate_fn=dataset.collate_fn)
        
        self.test_dataloader = DataLoader(dataset.test, batch_size=self.config["batch_size"],
                                         shuffle=False,
                                         num_workers=self.config["num_workers"],
                                         collate_fn=dataset.collate_fn)
        
        self.vocab = dataset.vocab_model
        self.config["num_classes"] = dataset.num_classes
        self.num_train = len(dataset.train)
        self.num_val = len(dataset.val)
        self.num_test = len(dataset.test)
        print("Train size: {}, Val size: {}, Test size: {}"
              .format(self.num_train, self.num_val, self.num_test))
        self.logger.write("Train size: {}, Val size: {}, Test size: {}"
              .format(self.num_train, self.num_val, self.num_test))
        
    # Build the semantic analyzer and put it on the GPU if it is available
    def _build_model(self):
        self.model = SentimentAnalyzer(self.vocab, self.config).to(self.config["device"])
        
    # Define the optimzer and helpers for the optimzer.
    # The stopper allows the model to stop training if it has not improved in a while
    # The scheduler allows the learning rate to be decreased if the model plateaus
    def _build_optimizer(self):
        parameters = [p for p in self.model.parameters() if p.requires_grad]
        self.optimizer = optim.Adam(parameters, lr=self.config["lr"])
        self.stopper = EarlyStopping(os.path.join(self.config["out_dir"], Constants._SAVED_WEIGHTS_FILE), patience=self.config["patience"])
        self.scheduler = ReduceLROnPlateau(self.optimizer, mode="max", factor=self.config["lr_reduce_factor"],
                                           patience=self.config["lr_patience"], verbose=True)
    
    # Since the model can only be right or wrong, accuracy is the best performance metric
    def _build_evaluation(self):
        self.metric = Accuracy(["accuracy"])
    
    def train(self):
        dur = []
        for epoch in range(self.config["epochs"]):
            self.model.train()
            train_loss = []
            train_acc = []
            t0 = time.time()
            for i, data in enumerate(self.train_dataloader):
                tgt = data['tgt_tensor'].to(self.config['device'])
                data["graph_data"] = data["graph_data"].to(self.config["device"])
                logits, loss = self.model(data["graph_data"], tgt, require_loss=True)
                
                # Add graph regularization loss if available
                # Regularizing the graph introduces more loss (the regularization factor)
                if data["graph_data"].graph_attributes.get("graph_reg", None) is not None:
                    loss = loss + data["graph_data"].graph_attributes["graph_reg"]
                
                # Backpropgation step
                # Zero the gradients, take the derivative of the loss, and step the optimizer
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                train_loss.append(loss.item())
                
                # Make a prediction based on the logits calculated earlier
                predict = torch.max(logits, dim=-1)[1].cpu()
                train_acc.append(self.metric.calculate_scores(ground_truth=tgt.cpu(), predict=predict.cpu(), zero_division=0)[0])
                dur.append(time.time() - t0)
                
            val_acc = self.evaluate(self.val_dataloader)
            self.scheduler.step(val_acc)
            print("Epoch: [{} / {}] | Time: {:.2f}s | Loss: {:.4f} | Train Acc: {:.2%} | Val Acc: {:.2%}"
                  .format(epoch + 1, self.config["epochs"], np.mean(dur), np.mean(train_loss), np.mean(train_acc), val_acc))
            self.logger.write("Epoch: [{} / {}] | Time: {:.2f}s | Loss: {:.4f} | Train Acc: {:.2%} | Val Acc: {:.2%}"
                  .format(epoch + 1, self.config["epochs"], np.mean(dur), np.mean(train_loss), np.mean(train_acc), val_acc))
            
            if self.stopper.step(val_acc, self.model):
                break
        
        return self.stopper.best_score
    
    # Validation step of the model
    # Used within model training at the end of each epoch
    def evaluate(self, dataloader):
        self.model.eval()
        with torch.no_grad():
            pred_collect = []
            tgt_collect = []
            
            for i, data in enumerate(dataloader):
                tgt = data['tgt_tensor'].to(self.config['device'])
                data["graph_data"] = data["graph_data"].to(self.config["device"])
                logits = self.model(data["graph_data"], require_loss=False)
                pred_collect.append(logits)
                tgt_collect.append(tgt)
            
            pred_collect = torch.max(torch.cat(pred_collect, 0), dim=-1)[1].cpu()
            tgt_collect = torch.cat(tgt_collect, 0).cpu()
            score = self.metric.calculate_scores(ground_truth=tgt_collect, predict=pred_collect, zero_division=0)[0]
            
            return score
        
    def test(self):
        # Restore the best saved model
        self.stopper.load_checkpoint(self.model)
        
        t0 = time.time()
        acc = self.evaluate(self.test_dataloader)
        dur = time.time() - t0
        print("Test examples: {} | Time: {:.2f}s | Test Acc: {:.2%}"
              .format(self.num_test, dur, acc))
        self.logger.write("Test examples: {} | Time: {:.2f}s | Test Acc: {:.2%}"
              .format(self.num_test, dur, acc))
        
        return acc

## Step 4: Configure and Run the Model
Open the configuration file, which will be fed to the model through the `ModelHandler`. The configuration file describes the parameters of how the model should be built. So, to get a different model, the only thing that needs to change is to provide a different configuration file.RNG seeds for numpy and PyTorch are set so that the model training is deterministic (will have the same result every time). Also, add to the configuration the device the model should run on.

The model will detect whether raw or processed data already exists. If there is no raw or processed data, `download()` method of `RTDataset` will be called automatically. If there is raw data but no processed data, StanfordCoreNLP needs to be running at the same time as this notebook with the same port and timeout that is defined in `RTDataset` (in this case, port 9000 with timeout=15000). To run StanfordCoreNLP (v4.2.2) with Java 8, navigate to the folder that contains StanfordCoreNLP and run this command:

    java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000

Finally, run the `train()` and `test()` functions and see the how the model performs.

In [6]:
# Configure
config_file = "../../config/graph4nlp/graphsage_bi_fuse_static_constituency.yaml"
config = yaml.load(open(config_file, "r"), Loader=yaml.FullLoader)

# Set all RNG seeds to the same seed to ensure a deterministic model
np.random.seed(config["seed"])
torch.manual_seed(config["seed"])

if not config["no_cuda"] and torch.cuda.is_available():
    print("[ Using CUDA ]")
    config["device"] = torch.device("cuda" if config["gpu"] < 0 else "cuda:%d" % config["gpu"])
    torch.cuda.manual_seed(config["seed"])
    torch.cuda.manual_seed_all(config["seed"])
    torch.backends.cudnn.deterministic = True
    cudnn.benchmark = False
else:
    print("[ Using CPU ]")
    config["device"] = torch.device("cpu")
    
print("\n" + config["out_dir"])

runner = ModelHandler(config)
t0 = time.time()

val_acc = runner.train()
test_acc = runner.test()

runtime = time.time() - t0
print('Total runtime: {:.2f}s'.format(runtime))
runner.logger.write('Total runtime: {:.2f}s\n'.format(runtime))
runner.logger.close()

print('val acc: {}, test acc: {}'.format(val_acc, test_acc))

[ Using CPU ]

../../out/rotten_tomatoes/graphsage_bi_fuse_constituency_ckpt
Building vocabs...


.vector_cache/glove.840B.300d.zip: 2.18GB [30:39, 1.18MB/s]                                                            
100%|████████████████████████████████████████████████████████████████████▉| 2196016/2196017 [03:14<00:00, 11288.61it/s]


Pretrained word embeddings hit ratio: 0.9436234263820471
Using pretrained word embeddings
[ Initialized word embeddings: (3654, 300) ]
Saving vocab model to ../../data\processed\constituency_graph\vocab.pt
Loading pre-built vocab model stored in ../../data\processed\constituency_graph\vocab.pt
Train size: 1771, Val size: 160, Test size: 160
[ Fix word embeddings ]
Epoch: [1 / 500] | Time: 52.68s | Loss: 0.6468 | Train Acc: 60.11% | Val Acc: 53.75%
Saved model to ../../out/rotten_tomatoes/graphsage_bi_fuse_constituency_ckpt\params.saved
Epoch: [2 / 500] | Time: 51.93s | Loss: 0.5775 | Train Acc: 70.23% | Val Acc: 52.50%
EarlyStopping counter: 1 out of 10
Epoch: [3 / 500] | Time: 50.83s | Loss: 0.5498 | Train Acc: 72.87% | Val Acc: 52.50%
EarlyStopping counter: 2 out of 10
Epoch     4: reducing learning rate of group 0 to 1.0000e-03.
Epoch: [4 / 500] | Time: 51.11s | Loss: 0.5368 | Train Acc: 74.30% | Val Acc: 50.62%
EarlyStopping counter: 3 out of 10
Epoch: [5 / 500] | Time: 51.14s | Lo

## Results
Constituency: 69.5% accuracy

Dependency: 62.2% accuracy