## Set up environment

In [109]:
from dotenv import load_dotenv

_ = load_dotenv(override=True)

In [110]:
from common.context import TagPredictionContext

EXPERIMENT_ID = "2024-10-24-baseline-data-events"
RUN_ID = "on-events-dataset-v1"


CONTEXT = TagPredictionContext(
    description="Evaluate performance of the existing solution to set a baseline",
    experiment_id=EXPERIMENT_ID,
    run_id=RUN_ID,
    tags_in_scope=sorted(
        [
            "boulders",
            "dheqfailure",
            "dircontrol",
            "harddrilling",
            "highrop",
            "holecleaning",
            "lostcirculation",
            "lowrop",
            "packoff",
            "shallowgas",
            "shallowwater",
            "stuck",
            "stuckpipe",
            "surfeqfailure",
            "tighthole",
            "wait",
            "wellborebreathing",
            "wellborestability",
            "wellcontrol",
        ]
    ),
    with_notags=True,
)

## Fetch datasets

In [111]:
import pandas as pd
from common.datasets import load_input_dataset

dataset_name = "reviewed_distributed_events.csv"

dataset_df = load_input_dataset(
    dataset_name, columns_to_convert_to_sets=["tags", "Reviewed tags"]
)

CONTEXT.used_datasets = [dataset_name]
dataset_df

Unnamed: 0,id,scope,title,description,extendedCategory,tags,Are tags correct?,Reviewed tags,Comments
0,a1f86f80-135e-458b-aafc-3af30d2476f2_main_50aa...,Subsurface,Lossesin Heimdal,Heimdal 3754m dynamic mud losses at 6m3/hrs wi...,lostcirculation,{lostcirculation},YES,{lostcirculation},
1,a1f86f80-135e-458b-aafc-3af30d2476f2_main_3806...,Subsurface,Fault Related Losses,Fault was drilled. Mud losses of 14m3/hr o...,lostcirculation,{lostcirculation},YES,{lostcirculation},
2,a1f86f80-135e-458b-aafc-3af30d2476f2_main_154d...,Subsurface,Stringers,Set TD early due to not able to penetrate a st...,harddrilling,"{wellborebreathing, harddrilling, lostcirculat...",NO,"{wellborebreathing, harddrilling, wellborestab...",
3,a1f86f80-135e-458b-aafc-3af30d2476f2_main_7fea...,Subsurface,Losses in Balder Tuff,Observed indications off losses at 4353 m MD B...,lostcirculation,"{lostcirculation, wellcontrol}",YES,"{lostcirculation, wellcontrol}",
4,a1f86f80-135e-458b-aafc-3af30d2476f2_main_aab3...,Subsurface,Losses,Drilled horizontally through Balder / UDC sand...,lostcirculation,"{lostcirculation, wellborestability}",YES,"{lostcirculation, wellborestability}",
...,...,...,...,...,...,...,...,...,...
334,a1f86f80-135e-458b-aafc-3af30d2476f2_main_d483...,Subsurface,Kickoff,"Original hole, Mudline 129.5 m Categor...",other,{},YES,{},
335,a1f86f80-135e-458b-aafc-3af30d2476f2_main_daa4...,Subsurface,Kickoff,Kicked off 2/8-F-7 T3 Category: Direct...,other,{},YES,{},
336,a1f86f80-135e-458b-aafc-3af30d2476f2_main_2f0c...,Subsurface,Kickoff,Kicked off 2/8-N-5 T4 Category: Direct...,other,{},YES,{},
337,a1f86f80-135e-458b-aafc-3af30d2476f2_main_b543...,Subsurface,Kickoff,"Original hole, Mudline 110.3 m Categor...",other,{},YES,{},


## Apply the model

In [112]:
# nothing to do, DDR tagging using regex rules is already applied to the dataset in this experiment
from common.assessment import expand_tags

assessed_df = dataset_df.filter(
    items=["id", "Text", "tags", "Reviewed tags", "Comments"]
)
assessed_df = expand_tags(
    assessed_df,
    tags_in_scope=CONTEXT.tags_in_scope,
    ground_truth_tags_column="Reviewed tags",
    predicted_tags_column="tags",
)
assessed_df

Unnamed: 0,id,Comments,expected__boulders,actual__boulders,expected__dheqfailure,actual__dheqfailure,expected__dircontrol,actual__dircontrol,expected__harddrilling,actual__harddrilling,...,expected__tighthole,actual__tighthole,expected__wait,actual__wait,expected__wellborebreathing,actual__wellborebreathing,expected__wellborestability,actual__wellborestability,expected__wellcontrol,actual__wellcontrol
0,a1f86f80-135e-458b-aafc-3af30d2476f2_main_50aa...,,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,a1f86f80-135e-458b-aafc-3af30d2476f2_main_3806...,,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,a1f86f80-135e-458b-aafc-3af30d2476f2_main_154d...,,False,False,False,False,False,False,True,True,...,False,False,False,False,True,True,True,False,False,False
3,a1f86f80-135e-458b-aafc-3af30d2476f2_main_7fea...,,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,True
4,a1f86f80-135e-458b-aafc-3af30d2476f2_main_aab3...,,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
334,a1f86f80-135e-458b-aafc-3af30d2476f2_main_d483...,,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
335,a1f86f80-135e-458b-aafc-3af30d2476f2_main_daa4...,,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
336,a1f86f80-135e-458b-aafc-3af30d2476f2_main_2f0c...,,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
337,a1f86f80-135e-458b-aafc-3af30d2476f2_main_b543...,,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [113]:
# from common.datasets import save_assessed_dataset


# save_assessed_dataset(
#     assessed_df,
#     context=CONTEXT,
#     experiment_id=EXPERIMENT_ID,
#     run_id=RUN_ID,
# )

## Evaluate predicted tags

In [114]:
from common.evaluation import TagMatchingEvaluator


evaluator = TagMatchingEvaluator(
    assessed_df=assessed_df,
    tags_in_scope=CONTEXT.tags_in_scope,
    with_notags=CONTEXT.with_notags,
)

In [115]:
evaluator.eval_individual_ddrs()

Unnamed: 0,id,Comments,expected__boulders,actual__boulders,expected__dheqfailure,actual__dheqfailure,expected__dircontrol,actual__dircontrol,expected__harddrilling,actual__harddrilling,...,expected__wellborestability,actual__wellborestability,expected__wellcontrol,actual__wellcontrol,expected__notags,actual__notags,precision,recall,f1,true_positives
0,a1f86f80-135e-458b-aafc-3af30d2476f2_main_50aa...,,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,1.00,1.00,1.00,1
1,a1f86f80-135e-458b-aafc-3af30d2476f2_main_3806...,,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,1.00,1.00,1.00,1
2,a1f86f80-135e-458b-aafc-3af30d2476f2_main_154d...,,False,False,False,False,False,False,True,True,...,True,False,False,False,False,False,0.75,0.75,0.75,3
3,a1f86f80-135e-458b-aafc-3af30d2476f2_main_7fea...,,False,False,False,False,False,False,False,False,...,False,False,True,True,False,False,1.00,1.00,1.00,2
4,a1f86f80-135e-458b-aafc-3af30d2476f2_main_aab3...,,False,False,False,False,False,False,False,False,...,True,True,False,False,False,False,1.00,1.00,1.00,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
334,a1f86f80-135e-458b-aafc-3af30d2476f2_main_d483...,,False,False,False,False,False,False,False,False,...,False,False,False,False,True,True,1.00,1.00,1.00,1
335,a1f86f80-135e-458b-aafc-3af30d2476f2_main_daa4...,,False,False,False,False,False,False,False,False,...,False,False,False,False,True,True,1.00,1.00,1.00,1
336,a1f86f80-135e-458b-aafc-3af30d2476f2_main_2f0c...,,False,False,False,False,False,False,False,False,...,False,False,False,False,True,True,1.00,1.00,1.00,1
337,a1f86f80-135e-458b-aafc-3af30d2476f2_main_b543...,,False,False,False,False,False,False,False,False,...,False,False,False,False,True,True,1.00,1.00,1.00,1


In [116]:
evaluator.eval_per_tag()

Unnamed: 0,tag,precision,recall,f1,true_positives,positives_in_ground_truth,negatives_in_ground_truth
0,boulders,1.0,1.0,1.0,27,27,312
1,dheqfailure,1.0,0.540541,0.701754,20,37,302
2,dircontrol,1.0,0.842105,0.914286,16,19,320
3,harddrilling,1.0,0.962264,0.980769,51,53,286
4,highrop,0.916667,1.0,0.956522,11,11,328
5,holecleaning,1.0,1.0,1.0,28,28,311
6,lostcirculation,0.943396,0.961538,0.952381,50,52,287
7,lowrop,0.979592,0.813559,0.888889,48,59,280
8,packoff,1.0,0.833333,0.909091,35,42,297
9,shallowgas,1.0,1.0,1.0,21,21,318


In [117]:
evaluator.average_metrics()

Unnamed: 0,Type,precision,recall,f1
0,Average per DDR,0.902409,0.893707,0.886489
1,Average per Tag,0.944756,0.868729,0.856932


## Save evaluation report

In [118]:
from common.datasets import save_evaluation_report

save_evaluation_report(
    experiment_id=EXPERIMENT_ID,
    run_id=RUN_ID,
    dataset_df=dataset_df,
    assessed_df=assessed_df,
    evaluator=evaluator,
    context=CONTEXT,
)