In [2]:
import os
import json
import spacy

import utils.dataloader as dl

from absa import Predictor
from security import Authorization

nlp = spacy.load('en_core_web_sm')
pred = Predictor(os.path.join('model', 'en-laptops-absa'))
key = "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJleHAiOjE2MTk1OTU3MDYsInN1YiI6IkFsZXhhbmRlciBSaWV0emxlciIsImlzcyI6ImRlZXBvcGluaW9uLmFpIiwibGFuZ3VhZ2VzIjpbIioiXSwiZnVuY3Rpb25hbGl0eSI6WyIqLyoiXSwiaWF0IjoxNTg4MDU5NzA2fQ.Qz5VPxBIWmmUUpNUp29jw1IKL8TYS_I0vrP_LRWZ9v09tueKHvSddoa8lwjFGi6plAtt6j0w6RiCnSAiw5djQJBXaY40TL36OFjddRrS97zstyizLrXKigQZRqN0w9j53OTV9ViJSXZ8itPLs7bt0KkTsFxoO7gqzC6--SR63c50KS4JQNXCm0an6bePGAtL6OtYABCeLp-TQaR4BfMsqvbBS5T3NSOx65ZPc5COXHZdzRN3gpdc-FXwzRmhzk8LcP4O4tZhxqHUD4u5Rx6sHiCKXULsS_-_hg4344_6taK3UX5IM5h50uXWdLtZ8d-otpZMM0sZijy9XT4jz-mBd_Xzg8nOcHz-8CZXra6NBNgBxpZkJTU_MekZwXKoNE7ktEd5xMruqaut0E_nXXeh32okbuqJ6fmb5F6VQzHBK5Z9Y9WU79tDs5NK9q_zFhLh7ldJKBusCQrB8ADzDs_eBTXaxfMhi0pbFFZWrzIfDce3vrEdyQEXqo8vkrxTzR1YDg7aV47md_L309PolwVM66C6KmnKOT-FVCdIspW96iXoBJ8y7nAkYEM41u5xjqvK39qfmfqA5QeVQXUvBoU9XU0CH1pU6rmnsIpIFphBl598qqIynWWOfdaIk6CRTo-CTzPk06JY8XIuuBayJcbN26MAMKtyeAy7KMfXWmIY3DY"

Initializing Predictor
Loading model model/en-laptops-absa
Config loaded from model/en-laptops-absa/config.json
Aspects loaded from model/en-laptops-absa/aspects.jsonl
Config loaded from model/en-laptops-absa/config.json


# Train absa laptop Model on Aspects for Sem-Eval Data

1. Train Model:
    have Aspect file
    
    

In [4]:
filename = '../SemEval-2015-ABSA/absa_15_laptops_train_data.xml'

In [10]:
sentences, aspect_category_sentiments, (idx2aspectlabel, idx2sentilabel), cats = dl.semeval_to_aspectsentiment_hr(filename)
len(cats)

model_folder = os.path.join('model', 'en-laptops-absa')
documents = sentences
target = 'model/en-laptops-absa_bs32_ep20_nlw0.5_lr2e-5'
aspects = cats
token = key

In [None]:
train()

In [None]:
def train(model_folder, documents, aspects, target, token,
          batchsize=32, encrypt=True, state_callback=None,
          stop_callback=None, epochs=20, validation_documents=None, seed=None,
          none_label_weight=0.5, learning_rate=2e-5, save_interval=None):
    """Trains a model with given annotated documents.
    Args:
        model_folder (str): A path or name of the base model that should be used.
        documents (list): An annotated list of documents.
        validation_documents (list): An annotated list of documents.
        target: (string): A path where the model should be saved.
        token (string): A JSON web token which is validated.
        batchsize (int): The batchsize used for training.
        epochs (int): The maximum number of epochs for training.
        encrypt (boolean): A flag whether to encrypt the model or not, default *True*.
        save_interval (int): Number of global steps after which a model checkpoint is saved.
        state_callback: Callback that is executed regularly and gives progress and score params back.
   """
    # Authorize and Check
    authorization_response = Authorization.authorize(token, functionality='absa/train')
    if not authorization_response['verified']:
        raise ValueError(authorization_response['message'])
    model_folder = get_model_dir(model_folder, token=token)
    # Preconditions
    if seed is not None:
        fix_seed(seed)
    # Load and Train Model
    handlers = [Decrypter(), ShuffleData(),
                Train(), TorchOptimizer(optclass=Ranger, lr=learning_rate),
                LearningRateReporterAndChanger(),
                Encrypter(encrypt)]
    if batchsize is None:
        handlers.append(AutoBatchsizeSelector(basemodel='bert-base-uncased', maxbatchsize=batchsize, verbose=True))
    if stop_callback is not None:
        handlers.append(CustomTrainingStopper(stop_training_callback=stop_callback))
    if state_callback is not None:
        handlers.append(ProgressReporter(reporter=state_callback))
    X, Y = get_text_aspect_pairs(documents=documents, labels=DEFAULT_SENTIMENT_LABELS,
                                 none_label_weight=none_label_weight)
    dataset_size = len(X)
    if validation_documents is not None and state_callback is not None:
        X_test, Y_test = get_text_aspect_pairs(documents=validation_documents, labels=DEFAULT_SENTIMENT_LABELS)

        # Custom evaluator for classification framework
        def evaluator(Y_true, Y_pred):
            return Metrics().calc_score_absa(X=X_test, Y_true=Y_true, Y_pred=Y_pred, labels=DEFAULT_SENTIMENT_LABELS,
                                             aspects=aspects
                                             )

        handlers.append(ScoreReporter(reporter=state_callback, evaluator=evaluator, X=X_test, Y=Y_test))
        BestModelKeeper(metricname='val_loss', maximize=False, verbose=True)
        handlers.append(
            EarlyStopping(patience=dataset_size * 2, lr_reduction_patience=dataset_size, lr_reduction_factor=0.1,
                          metricname='val_loss', verbose=True, epsilon=0.0001, maximize=False))

    ch = CallbackHandler(handlers)
    absa = Classifier(model=model_folder, num_labels=len(DEFAULT_SENTIMENT_LABELS), device=get_device(), ch=ch)
    absa.model.config.__dict__['do_aspects'] = aspects
    absa.model.config.__dict__['id2label'] = {id: label for id, label in enumerate(DEFAULT_SENTIMENT_LABELS)}
    absa.model.config.__dict__['label2id'] = {label: id for id, label in enumerate(DEFAULT_SENTIMENT_LABELS)}
    absa.model.train()
    absa(X, Y, batchsize=batchsize, verbose=True, epochs=epochs)
    # Save Model and Aspects
    absa.save(target)
    # Save to config
