In [1]:
%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')

def import_sys():
    import sys
    sys.path.append('..')
import_sys()

import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)  # pylint: disable=invalid-name

In [2]:
import errudite
print(errudite)

<module 'errudite' from '/Users/gyuhoshim/errudite/errudite/__init__.py'>


In [3]:
import pandas as pd
from tqdm import tqdm

from overrides import overrides

from errudite.io import DatasetReader
from errudite.utils import normalize_file_path, accuracy_score
from errudite.targets.instance import Instance
from errudite.targets.target import Target
from errudite.targets.label import Label, PredefinedLabel



In [4]:
# !pip install transformers==4.3.0

In [5]:
# !pip install sentencepiece

In [6]:
# !pip install accelerate

In [7]:
# !pip install  xlrd==1.1.0

In [8]:
# !pip install --upgrade ipywidgets

In [9]:
#Flan_T5

import pandas as pd
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
from typing import List, Dict
import sys
sys.path.append('..')
from errudite.predictors.predictor import Predictor
from errudite.targets.label import Label, PredefinedLabel

class Predictor_FLAN_T5(Predictor):
    def __init__(self, name: str, 
    model_path: str=None,
    model_online_path: str=None,
    description: str='',
    model_type: str=None) -> None:
        model = None
        Predictor.__init__(self, name, description, model, ['accuracy'])
        
    def predict_essay(self, essay : str) -> Dict[str,str]:
        # Load data into pandas DataFrame
        data_df = pd.read_excel("StudentEssays.xlsx")

        # Initialize T5 tokenizer and model
        tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
        model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")

        # Move the model to the CUDA device if available
        if torch.cuda.is_available():
            model.to("cuda")

        # Define a list of concepts to predict
        concepts_to_predict = ["potential energy", "kinetic energy", "Law of Conservation of Energy"]

        # Define possible outcome labels
        outcome_labels = ["Acceptable", "Unacceptable", "Insufficient", "Not Found"]

        # Create a list to store predictions as dictionaries
        predictions_list = []

        text = essay  # Assuming the text content is in column 'Essay'

        # Initialize predictions dictionary for this row
        predictions = {}

        # Iterate through each concept to predict
        for concept in concepts_to_predict:
            # Define a template for classification
            template = f"According to the following essay, is the student's definition of {concept} Acceptable, Unacceptable, Insufficient, or Not Found? Only use one of these labels for outputs\n{text}"
            # Prepare the input by replacing placeholders
            formatted_input = template
            # Tokenize and classify the text
            input_ids = tokenizer(formatted_input, return_tensors="pt", padding=True, truncation=True).input_ids.to("cuda" if torch.cuda.is_available() else "cpu")
            outputs = model.generate(input_ids, max_length=128)
            decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)  # Remove special tokens

            # Store the prediction in the dictionary
            predictions[concept] = next((label for label in outcome_labels if label.lower() in decoded_output.lower()), "Unknown")

            if predictions[concept] == "Unknown":
              print(len(decoded_output))
              with open('output.txt', 'w') as f:
                f.write(decoded_output)

        return predictions

INFO:pytorch_pretrained_bert.modeling:Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .
INFO:pytorch_transformers.modeling_bert:Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .
INFO:pytorch_transformers.modeling_xlnet:Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .
INFO:allennlp.common.registrable:instantiating registered subclass relu of <class 'allennlp.nn.activations.Activation'>
INFO:allennlp.common.registrable:instantiating registered subclass relu of <class 'allennlp.nn.activations.Activation'>
INFO:allennlp.common.registrable:instantiating registered subclass relu of <class 'allennlp.nn.activations.Activation'>
INFO:allennlp.common.registrable:instantiating registered subclass relu of <class 'allennlp.nn.activations.Activation'>


## 1. DatasetReader

In [10]:
@DatasetReader.register("STE")
class STEReader(DatasetReader):
    def __init__(self, cache_folder_path: str=None) -> None:
        super().__init__(cache_folder_path)
        # overwrite the primary evaluation method and metric name
        Label.set_task_evaluator(accuracy_score, 'accuracy')
        
    @overrides
    def _read(self, file_path: str, lazy: bool, sample_size: int):
        """
        Returns a list containing all the instances in the specified dataset.
 
        Parameters
        ----------
        file_path : str
            The path of the input data file.
        lazy : bool, optional
            If ``lazy==True``, only run the tokenization, does not compute the linguistic
            features like POS, NER. By default False
        sample_size : int, optional
            If sample size is set, only load this many of instances, by default None
        
        Returns
        -------
        List[Instance]
            The instance list.
        """
        instances = []
        essays = []
        logger.info("Reading instances from lines in file at: %s", file_path)
        df = pd.read_excel(normalize_file_path(file_path), sep='\t')
        for idx, row in tqdm(df.iterrows()):
            if lazy:
                essays.append(row['Essay'])
            else:
                instance = self._text_to_instance(f'q:{idx}', row)
                if instance is not None:
                    instances.append(instance)
                if sample_size and idx > sample_size:
                    break
        if lazy:
            return { "Essays": essays }
        else:
            return instances
    
    @overrides
    def _text_to_instance(self, id: str, row) -> Instance:
        # The function that transfers raw text to instance.
        essay = Target(qid=row['Essay_ID'], text=row['Essay'], vid=0, metas={'type': 'essays'})        # label
        groundtruth_PE = PredefinedLabel(
            model='groundtruth', 
            qid=row['Essay_ID'], 
            text=row['PE'], 
            vid=0, 
        )
        groundtruth_KE = PredefinedLabel(
            model='groundtruth', 
            qid=row['Essay_ID'], 
            text=row['KE'], 
            vid=0, 
        )
        groundtruth_LCE = PredefinedLabel(
            model='groundtruth', 
            qid=row['Essay_ID'], 
            text=row['LCE'], 
            vid=0, 
        )
        return self.create_instance(row['Essay_ID'], 
            essay=essay, 
            groundtruth_PE=groundtruth_PE,
            groundtruth_KE=groundtruth_KE,
            groundtruth_LCE=groundtruth_LCE)

In [11]:
from errudite.io import DatasetReader

cache_folder_path = "./ste_caches"
reader = DatasetReader.by_name("STE")(cache_folder_path=cache_folder_path)

INFO:errudite.utils.file_utils:Errudite cache folder selected: ./ste_caches


In [12]:
reader.read("ReadyForAudit.xlsx")

INFO:errudite.io.dataset_reader:Reading instances from lines in file at: ReadyForAudit.xlsx
INFO:__main__:Reading instances from lines in file at: ReadyForAudit.xlsx
76it [00:02, 28.42it/s]


[Instance [InstanceKey(qid='L24-RCFinal proposal, L3b8-23_RCFinal proposal, & L3b8-08_RCFinal proposal', vid=0)],
 Instance [InstanceKey(qid='L3b8-21_RCFinal proposal', vid=0)],
 Instance [InstanceKey(qid='L23-RCFinal proposal, L22-RCFinal proposal, L3b8-22_RCFinal proposal, L3b8-20_RCFinal proposal, & L3b8-19_RCFinal', vid=0)],
 Instance [InstanceKey(qid='L21-RCFinal proposal & L3b8-24_RCFinal proposal', vid=0)],
 Instance [InstanceKey(qid='L3b8-17_RCFinal proposal', vid=0)],
 Instance [InstanceKey(qid='L3b8-16_RCFinal proposal & UG_ENGM-RC74', vid=0)],
 Instance [InstanceKey(qid='L3b8-15_RCFinal proposal & UG_ENGM-RC63 ', vid=0)],
 Instance [InstanceKey(qid='L3b8-14_RCFinal proposal', vid=0)],
 Instance [InstanceKey(qid='L3b8-13_RCFinal proposal', vid=0)],
 Instance [InstanceKey(qid='L3b8-11_RCFinal proposal', vid=0)],
 Instance [InstanceKey(qid='L3b8-02_RCFinal proposal & L3b7-16_RCFinal', vid=0)],
 Instance [InstanceKey(qid='L216_RCFinal Proposal  & L215_RCFinal Proposal', vid=0)],

## 2. Predictor

In [13]:
from typing import List, Dict
import sys
sys.path.append('..')
from errudite.predictors.predictor import Predictor
from errudite.utils.evaluator import accuracy_score  #t
from errudite.targets.label import Label, PredefinedLabel

In [14]:
@Predictor.register("STE_FLAN_T5")
class PredictorSTE(Predictor_FLAN_T5):
    def __init__(self, name: str, 
        model_path: str=None,
        model_online_path: str=None,
        description: str='') -> None:
        Predictor_FLAN_T5.__init__(self, name, model_path, model_online_path, description)
        # import sys
        # sys.path.append('..')
        # from errudite.utils.evaluator import accuracy_score
        # Second, from the metrics above, pick one that's primary, and it will be used 
        # to compute `is_incorrect()` in any label target object: primary metric < 1.
        Label.set_task_evaluator(
            # the evaluation function that accepts pred and groundtruths, 
            # and return a dict of metrics: { metric_name: metric_score }. 
            # This is saved as Label.task_evaluation_func.
            task_evaluation_func=accuracy_score, 
            # The primary task metric name, ideally a key of task_evaluation_func ‘s return.
            task_primary_metric='accuracy')

    # the raw prediction function, returning the output of the model in a json format.
    def predict(self, essay: str) -> Dict[str, str]:
        predicted = self.predict_essay(essay)
        return predicted

    @classmethod
    # the class method that takes `Target` inputs, and output a `Label` object.
    def model_predict(cls, 
        predictor: Predictor, 
        essay: Target, 
        groundtruth_PE: Label, groundtruth_KE: Label, groundtruth_LCE: Label) -> 'Label':
        answer = None
        if not predictor:
            return answer
        predicted = predictor.predict(essay.get_text())
        if not predicted:
            return None
        answer_PE = PredefinedLabel(
            model=predictor.name, 
            qid=essay.qid,
            text=predicted['potential energy'], 
            vid=max([essay.vid]))
        answer_KE = PredefinedLabel(
            model=predictor.name, 
            qid=essay.qid,
            text=predicted['kinetic energy'], 
            vid=max([essay.vid]))
        answer_LCE = PredefinedLabel(
            model=predictor.name, 
            qid=essay.qid,
            text=predicted['Law of Conservation of Energy'], 
            vid=max([essay.vid]))
        answer_PE.compute_perform(groundtruths=groundtruth_PE)
        answer_KE.compute_perform(groundtruths=groundtruth_KE)
        answer_LCE.compute_perform(groundtruths=groundtruth_LCE)
        return answer_PE, answer_KE, answer_LCE



In [15]:
# read the raw data!
instances = reader.read(
    # The path of the input data file. We are using the first 100 rows from the SNLI dev set.
    file_path='ReadyForAudit.xlsx', 
    # If sample size is set, only load this many of instances, by default None.
    sample_size=10)

INFO:errudite.io.dataset_reader:Reading instances from lines in file at: ReadyForAudit.xlsx
INFO:__main__:Reading instances from lines in file at: ReadyForAudit.xlsx
10it [00:00, 24.81it/s]


In [16]:
from errudite.predictors import Predictor
model_path = "FLAN_T5_Essay"
predictor = Predictor.by_name("STE_FLAN_T5")(
    name ='FLAN_T5', 
    description='Prediction created by FLAN_T5')

In [17]:
from tqdm import tqdm 
logger.info("Running predictions....")
for instance in tqdm(instances):
    prediction_PE, prediction_KE, prediction_LCE = Predictor.by_name("STE_FLAN_T5").model_predict(
        predictor, 
        essay = instance.essay,
        groundtruth_PE = instance.groundtruth_PE, groundtruth_KE = instance.groundtruth_KE, groundtruth_LCE = instance.groundtruth_LCE)
    # set the prediction
    instance.set_entries(prediction_PE = prediction_PE, prediction_KE = prediction_KE, prediction_LCE = prediction_LCE)

INFO:__main__:Running predictions....
100%|██████████| 12/12 [02:20<00:00, 11.76s/it]


In [18]:
print(instances[2].get_entry('essay'), "\n")
print(instances[2].is_incorrect(model='FLAN_T5'), "\n")
  
instances[2].show_instance()

[Target] [InstanceKey(qid='L23-RCFinal proposal, L22-RCFinal proposal, L3b8-22_RCFinal proposal, L3b8-20_RCFinal proposal, & L3b8-19_RCFinal', vid=0)]
The initial drop must be the highest point of your roller coaster. It will be where all of the energy must be created in the form of Potential energy. A height of 90m would be just right because the initial drop can help the car to go over the hills with enough potential energy. I propose that we should make the roller coasters drop 90 meters tall because the roller coaster needs a lot of KE to go through the rest of the roller coaster. During initial drop, the increasing height increases potential energy at the top of hill and kinetic energy at the bottom of hill. When we increase the initial drop height the car has a faster speed so it'll be able to travel farther.

The hill of the roller coaster is another critical part. It must be a smaller height than the initial drop to have enough energy to go up the hill. The hill can be .80 mete

In [19]:
instances

[Instance [InstanceKey(qid='L24-RCFinal proposal, L3b8-23_RCFinal proposal, & L3b8-08_RCFinal proposal', vid=0)],
 Instance [InstanceKey(qid='L3b8-21_RCFinal proposal', vid=0)],
 Instance [InstanceKey(qid='L23-RCFinal proposal, L22-RCFinal proposal, L3b8-22_RCFinal proposal, L3b8-20_RCFinal proposal, & L3b8-19_RCFinal', vid=0)],
 Instance [InstanceKey(qid='L21-RCFinal proposal & L3b8-24_RCFinal proposal', vid=0)],
 Instance [InstanceKey(qid='L3b8-17_RCFinal proposal', vid=0)],
 Instance [InstanceKey(qid='L3b8-16_RCFinal proposal & UG_ENGM-RC74', vid=0)],
 Instance [InstanceKey(qid='L3b8-15_RCFinal proposal & UG_ENGM-RC63 ', vid=0)],
 Instance [InstanceKey(qid='L3b8-14_RCFinal proposal', vid=0)],
 Instance [InstanceKey(qid='L3b8-13_RCFinal proposal', vid=0)],
 Instance [InstanceKey(qid='L3b8-11_RCFinal proposal', vid=0)],
 Instance [InstanceKey(qid='L3b8-02_RCFinal proposal & L3b7-16_RCFinal', vid=0)],
 Instance [InstanceKey(qid='L216_RCFinal Proposal  & L215_RCFinal Proposal', vid=0)]]

In [20]:
predictor.evaluate_performance(instances)
print({"predictor": predictor.name, "perform": predictor.perform })

AttributeError: 'NoneType' object has no attribute 'perform'