In [2]:
%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')

def import_sys():
    import sys
    sys.path.append('..')
import_sys()

import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)  # pylint: disable=invalid-name


In [3]:
import pandas as pd
from tqdm import tqdm

from overrides import overrides

from errudite.io import DatasetReader
from errudite.utils import normalize_file_path, accuracy_score
from errudite.targets.instance import Instance
from errudite.targets.target import Target
from errudite.targets.label import Label, PredefinedLabel



In [5]:
CU_result=pd.read_csv("relationship_pred_vs_actuals.csv")

In [16]:
essay=list(CU_result["Essay"])

In [18]:
CU0_df=CU_result[["CU0_Actual","CU0_Predicted"]]
CU5_df=CU_result[["CU5_Actual","CU5_Predicted"]]

In [19]:
CU_result

Unnamed: 0,Essay,CU0_Actual,CU0_Predicted,CU5_Actual,CU5_Predicted
0,We are going to gave are roller coaster drop h...,0,0,0,0
1,My recommendation for the roller coaster would...,0,1,1,1
2,I think the drop height of the roller coaster ...,1,1,1,1
3,"First, I would recommend that the roller coast...",1,1,1,1
4,We started at a release height of 2 then we te...,1,1,1,1
5,My recommendation is we start our roller coast...,1,1,1,1
6,We should start the roller coaster at 5 meters...,1,1,1,1
7,We are starting our rollercoaster drop height ...,1,1,1,0
8,My recommendation is to start with the height ...,0,1,0,1
9,The drop height when changes the potential ene...,1,1,1,0


In [53]:
from typing import List
from allennlp.data.instance import Instance
@DatasetReader.register("STE_relationship")
class STEReader_relationship(DatasetReader):
    # ... (previous code)
    @overrides
    def _read(self, file_path: str, lazy: bool, sample_size: int) -> List[Instance]:
        """
        Returns a list containing all the instances in the specified dataset.

        Parameters
        ----------
        file_path : str
            The path of the input data file.
        lazy : bool
            If ``lazy==True``, only run the tokenization, does not compute the linguistic
            features like POS, NER. By default False
        sample_size : int
            If sample size is set, only load this many of instances, by default None.

        Returns
        -------
        List[Instance]
            The instance list.
        """
        instances = []
        logger.info("Reading instances from lines in file at: %s", file_path)
        
        # Read data from the CSV file
        df = pd.read_csv(normalize_file_path(file_path))

        for idx, row in tqdm(df.iterrows()):
            instance = self._text_to_instance(f'q:{idx}', row)
            if instance is not None:
                instances.append(instance)
            if sample_size and idx >= sample_size:
                break

        return instances

    @overrides
    def _text_to_instance(self, id: str, row) -> Instance:
        # Extract information from DataFrame columns
        essay_text = row['Essay']
        cu0_actual = row['CU0_Actual']
        cu0_predicted = row['CU0_Predicted']
        cu5_actual = row['CU5_Actual']
        cu5_predicted = row['CU5_Predicted']

        # Create instance with extracted information
        essay = Target(qid=id, text=essay_text, vid=0, metas={'type': 'essays'})
        groundtruth_cu0 = PredefinedLabel(model='groundtruth', qid=id, text=cu0_actual, vid=0)
        predict_cu0=PredefinedLabel(model='groundtruth', qid=id, text=cu0_predicted, vid=0)
        groundtruth_cu5 = PredefinedLabel(model='groundtruth', qid=id, text=cu5_actual, vid=0)
        predict_cu5=PredefinedLabel(model='groundtruth', qid=id, text=cu5_predicted, vid=0)
        return self.create_instance(
            id,
            essay=essay,
            groundtruth_cu0=groundtruth_cu0,
            predict_cu0=predict_cu0,
            groundtruth_cu5=groundtruth_cu5,
            predict_cu5=predict_cu5
        )





In [54]:
from errudite.io import DatasetReader

cache_folder_path = "./ste_caches"
reader = DatasetReader.by_name("STE_relationship")(cache_folder_path=cache_folder_path)

INFO:errudite.utils.file_utils:Errudite cache folder selected: ./ste_caches


In [55]:
# read the raw data!
instances = reader.read(
    # The path of the input data file. We are using the first 100 rows from the SNLI dev set.
    file_path='relationship_pred_vs_actuals.csv', 
    # If sample size is set, only load this many of instances, by default None.
    sample_size=175)

INFO:errudite.io.dataset_reader:Reading instances from lines in file at: relationship_pred_vs_actuals.csv
INFO:__main__:Reading instances from lines in file at: relationship_pred_vs_actuals.csv
175it [00:09, 19.50it/s]


In [63]:
instances[1].show_instance()

[Instance] [InstanceKey(qid='q:1', vid=0)]
[essay]	My recommendation for the roller coaster would be that for the initial drop to be at the tallest it can to get a good momentum to keep your ride going. Following that your hill shouldn't be taller than your initial drop a the start because then the car wont be able to make it over. Lastly your mass shouldn't be huge but not small so right in the middle so the car can do the hills and all the stuff you want it to safely.

The greater amount of height there was more total energy released in the end, But with friction the total amount of energy would go down.
As the car goes down and comes to the stop PE goes all the way to 0 meanwhile KE goes up to a higher number.
The PE and KE depend on the height of the drop because the more the height is the more the PE will be a the top of the ride and the more the KE will be at the bottom of the ride. 
[groundtruth_cu0]	0	groundtruth	{}
[predict_cu0]	1	groundtruth	{}
[groundtruth_cu5]	1	groundtruth

In [64]:
from typing import List, Dict
import sys
sys.path.append('..')


In [65]:
import traceback
from typing import Union, List
from spacy.tokens import Doc, Span, Token
def import_sys():
    import sys
    sys.path.append('..')
import_sys()
from errudite.utils.helpers import convert_doc
from errudite.utils.check import DSLValueError
import logging
logger = logging.getLogger(__name__)  # pylint: disable=invalid-name

In [66]:
# ---------
# Build the instance store hash
from errudite.targets.instance import Instance
instance_hash, instance_hash_rewritten, qid_hash = Instance.build_instance_hashes(instances)

## Evaluate Performance

In [106]:
def eval_performance(group):

    cu0_count=0
    cu5_count=0
    len_group=0
    for key in group.get_instances():
        len_group+=1
        instance =Instance.get(key)
        if(int(instance.get_entry("groundtruth_cu0").label)== int(instance.get_entry("predict_cu0").label)):
            cu0_count+=1 
        if(int(instance.get_entry("groundtruth_cu5").label)== int(instance.get_entry("predict_cu5").label)):
            cu5_count+=1 
    cu0_acc=cu0_count/len_group
    cu5_acc=cu5_count/len_group
    print(f'cu0 accuracy: {cu0_acc}\ncu5_accuracy: {cu5_acc}')

In [108]:
eval_performance(len_group)

cu0 accuracy: 0.9242424242424242
cu5_accuracy: 0.8636363636363636


In [85]:

count=0
for iter_instance in count_group_list:
    if(int(iter_instance.get_entry("groundtruth_cu0").label)== int(iter_instance.get_entry("predict_cu0").label)):
           count+=1 
            
print(count/len(count_group_list))

0.9242424242424242


## Group - Length of the Essay

In [113]:
from errudite.builts import Group
from errudite.builts import Attribute


# Create an attribute based on the location function
attr = Attribute.create(
    name="len_entities",
    description="length entities in the essay",
    cmd="length(essay)"
)

import numpy as np

data=list(attr.get_instances().values())
lower_quartile = np.percentile(data, 25)
upper_quartile = np.percentile(data, 75)
median=np.percentile(data, 50)


print("Lower Quartile :", lower_quartile)
print("median :",median)
print("Upper Quartile :", upper_quartile)



INFO:errudite.build_blocks.wrapper:Parsed: FuncOp(length):[ArgOp:essay]+[]
INFO:errudite.builts.attribute:Created attr: len_entities


Lower Quartile : 251.5
median : 361.0
Upper Quartile : 464.0


In [114]:

len_group = Group.create(
    # The name of the attribute
    name="length",
    # the description of the attribute
    description="length greater than 10",
    # All the previously created attributes and groups 
    # can be used and queried, as long as we serve the 
    # stored attributes and groups as part of the inputs.
    cmd="length(essay) >= 248.5",
    attr_hash=Attribute.store_hash(),
    group_hash=Group.store_hash()
)

len_group

INFO:errudite.build_blocks.wrapper:Parsed: [BinOp](>=):[FuncOp(length):[ArgOp:essay]+[], 248.5]
INFO:errudite.builts.group:Created group: length


[Group](length): 
	CMD	: length(essay) >= 248.5
	COUNT	: 132

In [115]:
eval_performance(len_group)

cu0 accuracy: 0.9242424242424242
cu5_accuracy: 0.8636363636363636


## Group - Quantitative Value

In [110]:
from errudite.build_blocks import PrimFunc
from errudite.build_blocks.prim_funcs.linguistic import linguistic

@PrimFunc.register()
def contains_quantity(docs: Union['Target', Span]) -> bool:
    """
    Detect the presence of quantity entities in the essay.
    quantity entity: measurements or counts.
    """
    # Use the linguistic function to extract entity types
    #print(docs)
    entities = linguistic(docs, label='ent_type')
    # print(entities)
    # print(docs)
    contains='QUANTITY' in entities

    # Check if 'bottom' or 'top' is present in the extracted entity types
    #print(contains)
    return contains


In [111]:
from errudite.builts import Attribute, Group

# Create an attribute based on the location function
attr = Attribute.create(
    name="quantity_entities",
    description="Presence of quantity entities in the essay",
    cmd="contains_quantity(essay)"
)

# Create a group that checks for the presence of location entities
quantity_group = Group.create(
    name="quantity",
    description="quantity entity detected",
    cmd="attr:quantity_entities == TRUE",
    attr_hash=Attribute.store_hash(),
    group_hash=Group.store_hash()
)
quantity_group

INFO:errudite.build_blocks.wrapper:Parsed: FuncOp(contains_quantity):[ArgOp:essay]+[]
INFO:errudite.builts.attribute:Created attr: quantity_entities
INFO:errudite.build_blocks.wrapper:Parsed: [BinOp](==):[[BuildBlockOp](attr):quantity_entities, True]
INFO:errudite.builts.group:Created group: quantity


<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class '

[Group](quantity): 
	CMD	: attr:quantity_entities == TRUE
	COUNT	: 138

In [112]:
eval_performance(quantity_group)

cu0 accuracy: 0.927536231884058
cu5_accuracy: 0.8478260869565217


In [116]:
from errudite.build_blocks import PrimFunc
from errudite.build_blocks.prim_funcs.linguistic import linguistic

@PrimFunc.register()
def num_quantity(docs: Union['Target', Span]) -> bool:
    """
    Detect the number of quantity entities in the essay.
    quantity entity: measurements or counts.
    """
    # Use the linguistic function to extract entity types
    #print(docs)
    entities = linguistic(docs, label='ent_type')
    count = entities.count('QUANTITY')

    return count

from errudite.builts import Attribute, Group

# Create an attribute based on the location function
attr = Attribute.create(
    name="quantity_entities",
    description="number of quantity entities in the essay",
    cmd="num_quantity(essay)"
)

import numpy as np

data=list(attr.get_instances().values())
lower_quartile = np.percentile(data, 25)
upper_quartile = np.percentile(data, 75)
median=np.percentile(data, 50)


print("Lower Quartile :", lower_quartile)
print("median :",median)
print("Upper Quartile :", upper_quartile)



INFO:errudite.build_blocks.wrapper:Parsed: FuncOp(num_quantity):[ArgOp:essay]+[]
INFO:errudite.builts.attribute:Created attr: quantity_entities


<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class '

In [117]:

# Create a group that checks for the presence of location entities
num_quantity_group = Group.create(
    name="num_quantity",
    description="quantity entity detected",
    cmd="attr:quantity_entities > 4",
    attr_hash=Attribute.store_hash(),
    group_hash=Group.store_hash()
)

num_quantity_group


INFO:errudite.build_blocks.wrapper:Parsed: [BinOp](>):[[BuildBlockOp](attr):quantity_entities, 4.0]
INFO:errudite.builts.group:Created group: num_quantity


[Group](num_quantity): 
	CMD	: attr:quantity_entities > 4
	COUNT	: 100

In [118]:
eval_performance(num_quantity_group)

cu0 accuracy: 0.91
cu5_accuracy: 0.83


In [119]:
from errudite.build_blocks import PrimFunc
from errudite.build_blocks.prim_funcs.linguistic import linguistic

@PrimFunc.register()
def contains_ordinal(docs: Union['Target', Span]) -> bool:
    """
    Detect the presence of ordinal entities in the essay.
    ordinal entity: measurements or counts.
    """
    # Use the linguistic function to extract entity types
    entities = linguistic(docs, label='ent_type')
    # print(entities)
    # print(docs)
    contains='ORDINAL' in entities
    # Check if 'bottom' or 'top' is present in the extracted entity types
    #print(contains)
    return contains


In [120]:
from errudite.builts import Attribute, Group

# Create an attribute based on the location function
attr = Attribute.create(
    name="ordinal_entities",
    description="Presence of ordinal entities in the essay",
    cmd="contains_ordinal(essay)"
)

# Create a group that checks for the presence of ordinal entities
ordinal_group = Group.create(
    name="ordinal",
    description="ordinal entity detected",
    cmd="attr:ordinal_entities == TRUE",
    attr_hash=Attribute.store_hash(),
    group_hash=Group.store_hash()
)
quantity_group

INFO:errudite.build_blocks.wrapper:Parsed: FuncOp(contains_ordinal):[ArgOp:essay]+[]
INFO:errudite.builts.attribute:Created attr: ordinal_entities
INFO:errudite.build_blocks.wrapper:Parsed: [BinOp](==):[[BuildBlockOp](attr):ordinal_entities, True]
INFO:errudite.builts.group:Created group: ordinal


<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class 'spacy.tokens.doc.Doc'>
<class '

[Group](quantity): 
	CMD	: attr:quantity_entities == TRUE
	COUNT	: 138

In [121]:
eval_performance(quantity_group)

cu0 accuracy: 0.927536231884058
cu5_accuracy: 0.8478260869565217


## Group - Number of Adjectives

In [122]:
from errudite.build_blocks import PrimFunc
from errudite.build_blocks.prim_funcs.linguistic import STRING
import spacy
nlp = spacy.load("en_core_web_sm")


@PrimFunc.register()
def num_adjectives(target: 'Target') -> int:
    """
    Count the number of adjectives in a given target.
    """
    try:
        # Access the tokens associated with the Target
        #print(STRING(target))
        #tokens = STRING(target).tokens
        doc = nlp(STRING(target))
        adjectives = [token for token in doc if token.pos_ == "ADJ"]
        return len(adjectives)
    except Exception as e:
        ex = Exception(f"Unknown exception from [num_adjectives]: {e}")
        raise ex




In [123]:
from errudite.builts import Attribute, Group

# Assuming you have already defined the PrimFunc num_adjectives

# Create an attribute that counts the adjectives among essay targets
attr = Attribute.create(
    name="num_adjectives_in_essay",
    description="Number of adjectives among essay targets",
    cmd="num_adjectives(essay)"
)


# Create a group to check if there are more than 5 adjectives in the essay
adj_count_group = Group.create(
    name="adjective_count_group",
    description="Group for counting adjectives in the essay",
    cmd="attr:num_adjectives_in_essay > 10",
    attr_hash=Attribute.store_hash(),
    group_hash=Group.store_hash()
)

# You can now use the 'group' to check if there are more than 5 adjectives in your essay targets.
adj_count_group

INFO:errudite.build_blocks.wrapper:Parsed: FuncOp(num_adjectives):[ArgOp:essay]+[]
INFO:errudite.builts.attribute:Created attr: num_adjectives_in_essay
INFO:errudite.build_blocks.wrapper:Parsed: [BinOp](>):[[BuildBlockOp](attr):num_adjectives_in_essay, 10.0]
INFO:errudite.builts.group:Created group: adjective_count_group


[Group](adjective_count_group): 
	CMD	: attr:num_adjectives_in_essay > 10
	COUNT	: 155

In [124]:
eval_performance(adj_count_group)

cu0 accuracy: 0.9225806451612903
cu5_accuracy: 0.8580645161290322


## Group - Number of Verbs

In [125]:
from errudite.build_blocks import PrimFunc
from errudite.build_blocks.prim_funcs.linguistic import STRING
import spacy
nlp = spacy.load("en_core_web_sm")


@PrimFunc.register()
def num_verbs(target: 'Target') -> int:
    """
    Count the number of verbs in a given target.
    """
    try:
        # Access the tokens associated with the Target
        #print(STRING(target))
        #tokens = STRING(target).tokens
        doc = nlp(STRING(target))
        adjectives = [token for token in doc if token.pos_ == "VERB"]
        return len(adjectives)
    except Exception as e:
        ex = Exception(f"Unknown exception from [num_adjectives]: {e}")
        raise ex




In [126]:
from errudite.builts import Attribute, Group

# Assuming you have already defined the PrimFunc num_adjectives

# Create an attribute that counts the adjectives among essay targets
attr = Attribute.create(
    name="num_verbs_in_essay",
    description="Number of verbs among essay targets",
    cmd="num_verbs(essay)"
)


# Create a group to check if there are more than 20 verbs in the essay
verb_count_group = Group.create(
    name="verb_count_group",
    description="Group for counting verbs in the essay",
    cmd="attr:num_verbs_in_essay > 40",
    attr_hash=Attribute.store_hash(),
    group_hash=Group.store_hash()
)

# You can now use the 'group' to check if there are more than 5 adjectives in your essay targets.
verb_count_group

INFO:errudite.build_blocks.wrapper:Parsed: FuncOp(num_verbs):[ArgOp:essay]+[]
INFO:errudite.builts.attribute:Created attr: num_verbs_in_essay
INFO:errudite.build_blocks.wrapper:Parsed: [BinOp](>):[[BuildBlockOp](attr):num_verbs_in_essay, 40.0]
INFO:errudite.builts.group:Created group: verb_count_group


[Group](verb_count_group): 
	CMD	: attr:num_verbs_in_essay > 40
	COUNT	: 131

In [127]:
eval_performance(verb_count_group)

cu0 accuracy: 0.9236641221374046
cu5_accuracy: 0.8549618320610687


## Group - Number of Nouns

In [128]:
from errudite.build_blocks import PrimFunc
from errudite.build_blocks.prim_funcs.linguistic import STRING
import spacy
nlp = spacy.load("en_core_web_sm")


@PrimFunc.register()
def num_verbs(target: 'Target') -> int:
    """
    Count the number of verbs in a given target.
    """
    try:
        # Access the tokens associated with the Target
        #print(STRING(target))
        #tokens = STRING(target).tokens
        doc = nlp(STRING(target))
        adjectives = [token for token in doc if token.pos_ == "NOUN"]
        return len(adjectives)
    except Exception as e:
        ex = Exception(f"Unknown exception from [num_adjectives]: {e}")
        raise ex






In [129]:
from errudite.builts import Attribute, Group

# Assuming you have already defined the PrimFunc num_adjectives

# Create an attribute that counts the adjectives among essay targets
attr = Attribute.create(
    name="num_verbs_in_essay",
    description="Number of verbs among essay targets",
    cmd="num_verbs(essay)"
)


# Create a group to check if there are more than 20 verbs in the essay
verb_count_group = Group.create(
    name="verb_count_group",
    description="Group for counting verbs in the essay",
    cmd="attr:num_verbs_in_essay > 50",
    attr_hash=Attribute.store_hash(),
    group_hash=Group.store_hash()
)

# You can now use the 'group' to check if there are more than 5 adjectives in your essay targets.
verb_count_group

INFO:errudite.build_blocks.wrapper:Parsed: FuncOp(num_verbs):[ArgOp:essay]+[]
INFO:errudite.builts.attribute:Created attr: num_verbs_in_essay
INFO:errudite.build_blocks.wrapper:Parsed: [BinOp](>):[[BuildBlockOp](attr):num_verbs_in_essay, 50.0]
INFO:errudite.builts.group:Created group: verb_count_group


[Group](verb_count_group): 
	CMD	: attr:num_verbs_in_essay > 50
	COUNT	: 126

In [130]:
eval_performance(verb_count_group)

cu0 accuracy: 0.9365079365079365
cu5_accuracy: 0.873015873015873


## Group - Contains Location Entity

In [155]:
from errudite.build_blocks import PrimFunc
from errudite.build_blocks.prim_funcs.linguistic import STRING
import spacy
nlp = spacy.load("en_core_web_sm")


@PrimFunc.register()
def contains_loc(target: 'Target') -> int:
    """
    Detect the presence of location entities ('bottom' or 'top') in the essay.
    """
    try:
        # Access the tokens associated with the Target
        target_str=STRING(target)
        #tokens = STRING(target).tokens
        if "bottom" in target_str or "top" in target_str:
            return True
    except Exception as e:
        ex = Exception(f"Unknown exception from [num_adjectives]: {e}")
        raise ex





In [156]:
from errudite.builts import Attribute, Group

# Assuming you have already defined the PrimFunc num_adjectives

# Create an attribute that counts the adjectives among essay targets
attr = Attribute.create(
    name="contains_loc_in_essay",
    description="Number of locations among essay targets",
    cmd="contains_loc(essay)"
)


# Create a group to check if there are more than 5 adjectives in the essay
contains_loc_group = Group.create(
    name="adjective_count_group",
    description="Group for counting locations in the essay",
    cmd="attr:contains_loc_in_essay==TRUE ",
    attr_hash=Attribute.store_hash(),
    group_hash=Group.store_hash()
)

# You can now use the 'group' to check if there are more than 5 adjectives in your essay targets.
contains_loc_group

INFO:errudite.build_blocks.wrapper:Parsed: FuncOp(contains_loc):[ArgOp:essay]+[]
INFO:errudite.builts.attribute:Created attr: contains_loc_in_essay
INFO:errudite.build_blocks.wrapper:Parsed: [BinOp](==):[[BuildBlockOp](attr):contains_loc_in_essay, True]
INFO:errudite.builts.group:Created group: adjective_count_group


[Group](adjective_count_group): 
	CMD	: attr:contains_loc_in_essay==TRUE 
	COUNT	: 57

In [158]:
eval_performance(contains_loc_group)

{'predictor': 'FLAN_T5', 'perform': {'accuracy': 0.05263157894736842, 'accuracy_PE_Acceptable': 0.0, 'accuracy_PE_Unacceptable': 'None', 'accuracy_PE_Insufficient': 0.825, 'accuracy_PE_NotFound': 0.0, 'accuracy_KE_Acceptable': 'None', 'accuracy_KE_Unacceptable': 'None', 'accuracy_KE_Insufficient': 0.7, 'accuracy_KE_NotFound': 0.07407407407407407, 'accuracy_LCE_Acceptable': 0.2631578947368421, 'accuracy_LCE_Unacceptable': 'None', 'accuracy_LCE_Insufficient': 0.0, 'accuracy_LCE_NotFound': 0.5185185185185185}}


In [134]:
from errudite.build_blocks import PrimFunc
from errudite.build_blocks.prim_funcs.linguistic import STRING
import spacy
nlp = spacy.load("en_core_web_sm")


@PrimFunc.register()
def num_assertions(target: 'Target') -> int:
    """
    Count the number of verbs in a given target.
    """
    try:
        # Access the tokens associated with the Target
        #print(STRING(target))
        #tokens = STRING(target).tokens
        doc = nlp(STRING(target))
        # Filter statements starting with "I believe" or "I think"
        filtered_statements = [sent.text for sent in doc.sents if sent.text.lower().startswith("i believe") or sent.text.lower().startswith("i think")]
        #print(filtered_statements)
        return filtered_statements
    except Exception as e:
        ex = Exception(f"Unknown exception from [  ]: {e}")
        raise ex
        
        
        








In [135]:
from errudite.builts import Attribute, Group

# Assuming you have already defined the PrimFunc num_adjectives

# Create an attribute that counts the adjectives among essay targets
attr = Attribute.create(
    name="contains_loc_in_essay",
    description="Number of adjectives among essay targets",
    cmd="num_assertions(essay)"
)


# Create a group to check if there are more than 5 adjectives in the essay
contains_loc_group = Group.create(
    name="adjective_count_group",
    description="Group for checking assertions in the essay",
    cmd="attr:contains_loc_in_essay==TRUE ",
    attr_hash=Attribute.store_hash(),
    group_hash=Group.store_hash()
)

# You can now use the 'group' to check if there are more than 5 adjectives in your essay targets.
contains_loc_group

INFO:errudite.build_blocks.wrapper:Parsed: FuncOp(num_assertions):[ArgOp:essay]+[]
INFO:errudite.builts.attribute:Created attr: contains_loc_in_essay
INFO:errudite.build_blocks.wrapper:Parsed: [BinOp](==):[[BuildBlockOp](attr):contains_loc_in_essay, True]
INFO:errudite.builts.group:Created group: adjective_count_group


[Group](adjective_count_group): 
	CMD	: attr:contains_loc_in_essay==TRUE 
	COUNT	: 0

In [131]:
eval_performance(contains_loc_group)

{'predictor': 'FLAN_T5', 'perform': {'accuracy': 0.045454545454545456, 'accuracy_PE_Acceptable': 0.0, 'accuracy_PE_Unacceptable': 'None', 'accuracy_PE_Insufficient': 0.6363636363636364, 'accuracy_PE_NotFound': 0.0, 'accuracy_KE_Acceptable': 'None', 'accuracy_KE_Unacceptable': 'None', 'accuracy_KE_Insufficient': 0.6666666666666666, 'accuracy_KE_NotFound': 0.15384615384615385, 'accuracy_LCE_Acceptable': 0.25, 'accuracy_LCE_Unacceptable': 'None', 'accuracy_LCE_Insufficient': 0.0, 'accuracy_LCE_NotFound': 0.6153846153846154}}


In [40]:
import numpy as np

data = [4, 1, 1, 3, 3, 1, 4, 1, 4, 5, 0, 2, 1, 3, 0, 1, 0, 3, 4, 1, 0, 3, 0, 5, 0, 2, 3, 0, 0, 1, 4, 2, 0, 0, 8, 1, 1, 2, 0, 3, 0, 1, 4, 0, 0, 0, 1, 0, 1, 2, 2, 3, 2, 6, 2, 2, 1, 2, 2, 1, 3, 8, 2, 2, 11, 6, 3, 3, 2, 10, 2, 1, 1, 3, 2, 0]

# Calculate the lower quartile (Q1)
lower_quartile = np.percentile(data, 25)
upper_quartile = np.percentile(data, 75)
median=np.percentile(data, 50)


print("Lower Quartile :", lower_quartile)
print("median :",median)

print("Upper Quartile :", upper_quartile)

Lower Quartile : 1.0
median : 2.0
Upper Quartile : 3.0


In [41]:
import spacy

# Load the SpaCy English model
nlp = spacy.load("en_core_web_sm")

# Example text
text = "I don't believe that understanding potential energy is not crucial. However, I think kinetic energy is equally important."

# Process the text using SpaCy
doc = nlp(text)

# Identify negations
negations = [token.text for token in doc if token.dep_ == "neg"]

# Count the number of negations
num_negations = len(negations)

# Print the results
print("Negations:", negations)
print("Number of negations:", num_negations)


Negations: ["n't", 'not']
Number of negations: 2


In [89]:
from errudite.build_blocks import PrimFunc
from errudite.build_blocks.prim_funcs.linguistic import STRING
import spacy
nlp = spacy.load("en_core_web_sm")


@PrimFunc.register()
def num_negations(target: 'Target') -> int:
    """
    Count the number of verbs in a given target.
    """
    try:
        # Access the tokens associated with the Target
        #print(STRING(target))
        #tokens = STRING(target).tokens
        doc = nlp(STRING(target))
        # Identify negations
        negations = [token.text for token in doc if token.dep_ == "neg"]
        # Count the number of negations
        num_negations = len(negations)
        return num_negations
    except Exception as e:
        ex = Exception(f"Unknown exception from [  ]: {e}")
        raise ex
        
        
    



In [96]:
from errudite.builts import Attribute, Group

# Assuming you have already defined the PrimFunc num_adjectives

# Create an attribute that counts the adjectives among essay targets
attr = Attribute.create(
    name="contains_negation_in_essay",
    description="Number of nagations among essay targets",
    cmd="num_negations(essay)"
)


# Create a group to check if there are more than 5 adjectives in the essay
contains_negation_group = Group.create(
    name="contains_negation_group",
    description="Group for checking negations in the essay",
    cmd="attr:contains_negation_in_essay  > 3 ",
    attr_hash=Attribute.store_hash(),
    group_hash=Group.store_hash()
)

# You can now use the 'group' to check if there are more than 5 adjectives in your essay targets.
contains_negation_group

INFO:errudite.build_blocks.wrapper:Parsed: FuncOp(num_negations):[ArgOp:essay]+[]
INFO:errudite.builts.attribute:Created attr: contains_negation_in_essay
INFO:errudite.build_blocks.wrapper:Parsed: [BinOp](>):[[BuildBlockOp](attr):contains_negation_in_essay, 3.0]
INFO:errudite.builts.group:Created group: contains_negation_group


[Group](contains_negation_group): 
	CMD	: attr:contains_negation_in_essay  > 3 
	COUNT	: 14

In [97]:
eval_performance(contains_negation_group)

{'predictor': 'FLAN_T5', 'perform': {'accuracy': 0.0, 'accuracy_PE_Acceptable': 'None', 'accuracy_PE_Unacceptable': 'None', 'accuracy_PE_Insufficient': 0.5, 'accuracy_PE_NotFound': 0.0, 'accuracy_KE_Acceptable': 'None', 'accuracy_KE_Unacceptable': 'None', 'accuracy_KE_Insufficient': 0.5, 'accuracy_KE_NotFound': 0.25, 'accuracy_LCE_Acceptable': 0.5555555555555556, 'accuracy_LCE_Unacceptable': 'None', 'accuracy_LCE_Insufficient': 0.0, 'accuracy_LCE_NotFound': 0.375}}
