In [16]:
!pip install luigi transformers



In [17]:
import os
import pickle
import luigi
import pandas as pd
from sklearn.metrics import f1_score
import bert
import trainer
import utils

In [18]:
artifact_directory = "artifacts"

In [19]:
class LoadData(luigi.Task):

    def output(self):
        return luigi.LocalTarget(os.path.join(artifact_directory, "data.pkl"))

    def run(self):
        data = pd.read_csv("train.csv")
        with open(self.output().path, "wb") as outfile:
            pickle.dump(data, outfile)
        print("Data loading done!")

In [20]:
class TrainModel(luigi.Task):

    def requires(self):
        return LoadData()

    def run(self):
        classes = ["Computer Science", "Physics", "Mathematics", "Statistics", "Quantitative Biology",
                   "Quantitative Finance"]
        with open(self.input().path, "rb") as in_file:
            data = pd.read_pickle(in_file)
        data["text"] = data["TITLE"] + " ." + data["ABSTRACT"]
        Y = data[classes].values
        version = "final_1"
        processors = [utils.replace_latex_math_with, utils.to_corpus, utils.lemmatize_sentence]
        model = bert.BertToSingleLayerNeuralNetwork(config=bert.ModelConfig)
        model.version = version
        model.build(processors=processors)
        trainer_methodology = trainer.KFoldTrainer(config=trainer.TrainerConfig, model=model)
        trainer_methodology.initialize(x=data["text"], y=Y)
        model.save("artifacts")
        with self.output().open("w") as file:
            file.write("artifacts")

    def output(self):
        return luigi.LocalTarget('model_path.txt')

In [21]:
class EvaluateModel(luigi.Task):

    def requires(self):
        return TrainModel()

    def run(self):
        version = "final_1"
        processors = [utils.replace_latex_math_with, utils.to_corpus, utils.lemmatize_sentence]
        model = bert.BertToSingleLayerNeuralNetwork(config=bert.ModelConfig)
        model.version = version
        model.build(processors=processors)
        with open(self.input().path, "r") as input_file:
            directory = input_file.read()
        print("loading model from ", directory)
        model.load(directory=directory)

        validation_dataset = pd.read_csv("train.csv")
        validation_dataset["text"] = validation_dataset["TITLE"] + " ." + validation_dataset["ABSTRACT"]
        y_pred = model.predict(validation_dataset["text"])

        classes = ["Computer Science", "Physics", "Mathematics", "Statistics", "Quantitative Biology",
                   "Quantitative Finance"]
        Y_val = validation_dataset[classes].values
        y_pred_binary = (y_pred > 0.5).astype(int)

        f1Score = f1_score(Y_val, y_pred_binary, average='macro')
        print(f1Score)
        with self.output().open("w") as file:
            file.write("artifacts")        

    def output(self):
        return luigi.LocalTarget('eval.txt')

In [22]:
class DeployModel(luigi.Task):

  def requires(self):
        return EvaluateModel()
  def run(self):
        version = "final_1"
        processors = [utils.replace_latex_math_with, utils.to_corpus, utils.lemmatize_sentence]
        model = bert.BertToSingleLayerNeuralNetwork(config=bert.ModelConfig)
        model.version = version
        model.build(processors=processors)
        with open(self.input().path, "r") as input_file:
            directory = input_file.read()
        print("loading model from ", directory)
        model.load(directory=directory)
        model.save("model")
        with self.output().open("w") as file:
            file.write("artifacts")        

  def output(self):
      return luigi.LocalTarget('deploy.txt')

In [23]:
luigi.build([DeployModel()], local_scheduler=True)

DEBUG: Checking if DeployModel() is complete
DEBUG: Checking if EvaluateModel() is complete
INFO: Informed scheduler that task   DeployModel__99914b932b   has status   PENDING
DEBUG: Checking if TrainModel() is complete
INFO: Informed scheduler that task   EvaluateModel__99914b932b   has status   PENDING
INFO: Informed scheduler that task   TrainModel__99914b932b   has status   DONE
INFO: Done scheduling tasks
INFO: Running Worker with 1 processes
DEBUG: Asking scheduler for work...
DEBUG: Pending tasks: 2
INFO: [pid 59] Worker Worker(salt=033068874, workers=1, host=05ee1e0ab1b7, username=root, pid=59) running   EvaluateModel()
Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- T

loading model from  artifacts


INFO: [pid 59] Worker Worker(salt=033068874, workers=1, host=05ee1e0ab1b7, username=root, pid=59) done      EvaluateModel()
DEBUG: 1 running tasks, waiting for next task to finish
INFO: Informed scheduler that task   EvaluateModel__99914b932b   has status   DONE
DEBUG: Asking scheduler for work...
DEBUG: Pending tasks: 1
INFO: [pid 59] Worker Worker(salt=033068874, workers=1, host=05ee1e0ab1b7, username=root, pid=59) running   DeployModel()


0.1745849829935736


Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


loading model from  artifacts


INFO: [pid 59] Worker Worker(salt=033068874, workers=1, host=05ee1e0ab1b7, username=root, pid=59) done      DeployModel()
DEBUG: 1 running tasks, waiting for next task to finish
INFO: Informed scheduler that task   DeployModel__99914b932b   has status   DONE
DEBUG: Asking scheduler for work...
DEBUG: Done
DEBUG: There are no more tasks to run at this time
INFO: Worker Worker(salt=033068874, workers=1, host=05ee1e0ab1b7, username=root, pid=59) was stopped. Shutting down Keep-Alive thread
INFO: 
===== Luigi Execution Summary =====

Scheduled 3 tasks of which:
* 1 complete ones were encountered:
    - 1 TrainModel()
* 2 ran successfully:
    - 1 DeployModel()
    - 1 EvaluateModel()

This progress looks :) because there were no failed tasks or missing dependencies

===== Luigi Execution Summary =====



True

In [2]:
!python index.py

^C
