## Set up environment

In [241]:
from dotenv import load_dotenv

_ = load_dotenv(override=True)

In [242]:
from common.context import LLMTagPredictionContext

EXPERIMENT_ID = "2024-10-24-try-llm-for-extended-category"
RUN_ID = "on-events-dataset-v1--1-example-per-category"
EXAMPLE_COUNT = 1
RANDOM_SEED = 45

CONTEXT = LLMTagPredictionContext(
    description="Evaluate performance of the existing solution to set a baseline",
    experiment_id=EXPERIMENT_ID,
    run_id=RUN_ID,
    tags_in_scope=sorted(
        [
            "boulders",
            "harddrilling",
            "holecleaning",
            "lostcirculation",
            "other",
            "packoff",
            "shallowgas",
            "stuck",
            "tighthole",
            "waterflow",
            "wellborestability",
            "wellcontrol",
        ]
    ),
    with_notags=True,
    random_seed=RANDOM_SEED,
)

## Fetch datasets

In [243]:
from common.datasets import load_input_dataset

dataset_name = "reviewed_distributed_events.csv"

dataset_df = load_input_dataset(
    dataset_name, columns_to_convert_to_sets=["tags", "Reviewed tags"]
)

CONTEXT.used_datasets = [dataset_name]

In [244]:
import pandas as pd


shuffled_dataset_df = dataset_df.sample(frac=1, random_state=CONTEXT.random_seed).reset_index(drop=True)


examples = shuffled_dataset_df[shuffled_dataset_df["description"].str.len() > 40]

examples_per_category = [
    examples[examples["extendedCategory"] == ext_category].head(EXAMPLE_COUNT)
    for ext_category in CONTEXT.tags_in_scope
]

# Concatenate the examples into a single DataFrame
examples = pd.concat(examples_per_category, ignore_index=True)
# Convert 'Reviewed tags' to a sorted list
examples["Reviewed tags"] = examples["Reviewed tags"].apply(sorted)

# Drop duplicate rows
examples_df = examples.drop_duplicates("id").sort_values(by="id")
examples_df

Unnamed: 0,id,scope,title,description,extendedCategory,tags,Are tags correct?,Reviewed tags,Comments
9,a1f86f80-135e-458b-aafc-3af30d2476f2_main_11ea...,Subsurface,Shallow water flow (kick),"After KO and sliding from 550m MD, significant...",waterflow,"{wellcontrol, shallowwater}",YES,"[shallowwater, wellcontrol]",
3,a1f86f80-135e-458b-aafc-3af30d2476f2_main_35a8...,Subsurface,MudLoss,"25% losses (FWR) Category: Hydraulics,...",lostcirculation,{lostcirculation},YES,[lostcirculation],
0,a1f86f80-135e-458b-aafc-3af30d2476f2_main_3bf2...,Subsurface,Boulders,Slow drilling and vibrations. Local Lessons Le...,boulders,{boulders},NO,"[boulders, harddrilling, lowrop, wellborestabi...",
7,a1f86f80-135e-458b-aafc-3af30d2476f2_main_6cf3...,Subsurface,Stuck pipe due to poor hole cleaning,High ROP through unconsolidated sands resulted...,stuck,"{stuckpipe, highrop, holecleaning}",YES,"[highrop, holecleaning, stuckpipe]",
2,a1f86f80-135e-458b-aafc-3af30d2476f2_main_7079...,Subsurface,Optimising of ROP,"Drilling 23""x17"" section it was found that ROP...",holecleaning,"{highrop, holecleaning}",YES,"[highrop, holecleaning]",
11,a1f86f80-135e-458b-aafc-3af30d2476f2_main_7754...,Subsurface,12 bbl gain in Active,12 bbl gain in Active at 3299m. Flowchecked – ...,wellcontrol,{wellcontrol},YES,[wellcontrol],
1,a1f86f80-135e-458b-aafc-3af30d2476f2_main_8864...,Subsurface,"Reduced ROP // negative drill break in Lange, ...",Drilled with an ROP as expected until half way...,harddrilling,"{harddrilling, lowrop}",YES,"[harddrilling, lowrop]",
6,a1f86f80-135e-458b-aafc-3af30d2476f2_main_8ad3...,Subsurface,Kick - shallow water flow and gas,Observed water flow with associated gas from 8...,shallowgas,"{wellcontrol, shallowgas, shallowwater}",YES,"[shallowgas, shallowwater, wellcontrol]",
10,a1f86f80-135e-458b-aafc-3af30d2476f2_main_c6aa...,Subsurface,Packoffs,Packoffs- due to poor hole cleaning when using...,wellborestability,"{holecleaning, wellborestability}",NO,"[holecleaning, packoff, wellborestability]",
4,a1f86f80-135e-458b-aafc-3af30d2476f2_main_d483...,Subsurface,Kickoff,"Original hole, Mudline 129.5 m Categor...",other,{},YES,[],


In [245]:

# remove the examples from the dataset
test_dataset_df = dataset_df[~dataset_df["id"].isin(examples["id"])]
test_dataset_df

Unnamed: 0,id,scope,title,description,extendedCategory,tags,Are tags correct?,Reviewed tags,Comments
0,a1f86f80-135e-458b-aafc-3af30d2476f2_main_50aa...,Subsurface,Lossesin Heimdal,Heimdal 3754m dynamic mud losses at 6m3/hrs wi...,lostcirculation,{lostcirculation},YES,{lostcirculation},
1,a1f86f80-135e-458b-aafc-3af30d2476f2_main_3806...,Subsurface,Fault Related Losses,Fault was drilled. Mud losses of 14m3/hr o...,lostcirculation,{lostcirculation},YES,{lostcirculation},
2,a1f86f80-135e-458b-aafc-3af30d2476f2_main_154d...,Subsurface,Stringers,Set TD early due to not able to penetrate a st...,harddrilling,"{harddrilling, wellborebreathing, lostcirculat...",NO,"{harddrilling, wellborebreathing, lostcirculat...",
3,a1f86f80-135e-458b-aafc-3af30d2476f2_main_7fea...,Subsurface,Losses in Balder Tuff,Observed indications off losses at 4353 m MD B...,lostcirculation,"{wellcontrol, lostcirculation}",YES,"{wellcontrol, lostcirculation}",
4,a1f86f80-135e-458b-aafc-3af30d2476f2_main_aab3...,Subsurface,Losses,Drilled horizontally through Balder / UDC sand...,lostcirculation,"{lostcirculation, wellborestability}",YES,"{lostcirculation, wellborestability}",
...,...,...,...,...,...,...,...,...,...
333,a1f86f80-135e-458b-aafc-3af30d2476f2_main_9fdf...,Subsurface,Positive drilling break when drilling into Kai...,Possible positive drilling break at Top Kai. T...,other,{},YES,{},
335,a1f86f80-135e-458b-aafc-3af30d2476f2_main_daa4...,Subsurface,Kickoff,Kicked off 2/8-F-7 T3 Category: Direct...,other,{},YES,{},
336,a1f86f80-135e-458b-aafc-3af30d2476f2_main_2f0c...,Subsurface,Kickoff,Kicked off 2/8-N-5 T4 Category: Direct...,other,{},YES,{},
337,a1f86f80-135e-458b-aafc-3af30d2476f2_main_b543...,Subsurface,Kickoff,"Original hole, Mudline 110.3 m Categor...",other,{},YES,{},


## Apply the model

In [246]:
SYSTEM_PROMPT = f"""
You will be given a description of a drilling event, and your task is to determine the extended category of this event.
Available extended categories are: {', '.join(CONTEXT.tags_in_scope)}

Respond in 2 lines:
1. Think about the extended category that best describes the event. Refer to the examples.
2. Write a single word: the extended category that best applies to the event.
"""


def format_event(item):
    return f"Scope: {item['scope']}\nTitle: {item['title']}\n{item['description']}"

for idx, (_, row) in enumerate(examples.iterrows()):
    SYSTEM_PROMPT += f"\n## Example event {idx}\n{format_event(row)}\nExtended category: {row['extendedCategory']}\n"

CONTEXT.llm_system_prompt = SYSTEM_PROMPT

print(SYSTEM_PROMPT)
print(len(SYSTEM_PROMPT))


You will be given a description of a drilling event, and your task is to determine the extended category of this event.
Available extended categories are: boulders, harddrilling, holecleaning, lostcirculation, other, packoff, shallowgas, stuck, tighthole, waterflow, wellborestability, wellcontrol

Respond in 2 lines:
1. Think about the extended category that best describes the event. Refer to the examples.
2. Write a single word: the extended category that best applies to the event.

## Example event 0
Scope: Subsurface
Title: Boulders
Slow drilling and vibrations. Local Lessons Learned: See FWR. Category: Wellbore stability. Sub-category: Wellbore stability
Extended category: boulders

## Example event 1
Scope: Subsurface
Title: Reduced ROP // negative drill break in Lange, Spekk and Melke Fm
Drilled with an ROP as expected until half way into the Lange formation, where the ROP started decreasing. above Top Spekk, the ROP dropped to 2m/hr and the next 300m
Extended category: harddril

In [247]:
TEMPERATURE = 0

CONTEXT.llm_temperature = TEMPERATURE

In [248]:
from concurrent.futures import ThreadPoolExecutor
import os
import string
from tqdm.auto import tqdm
from common.llm import ask_openai


# Define a function to call ask_openai and get the predicted tags
def get_predicted_category(text):
    try:
        response = ask_openai(
            azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
            api_key=os.environ["AZURE_OPENAI_KEY"],
            api_version=os.environ["AZURE_OPENAI_API_VERSION"],
            deployment_name=os.environ["AZURE_OPENAI_DEPLOYMENT_ID"],
            system_prompt=SYSTEM_PROMPT,
            prompt=text,
        )
    except Exception as e:
        if "content management policy. Please modify your prompt" in str(e):
            print(e)
            return []  # running into the content filter
        raise

    correct_categories = set(CONTEXT.tags_in_scope)

    def normalize(t):
        # sometimes model makes mistakes
        while True:
            new_t = t.lower().strip().strip(string.punctuation).strip(string.digits)
            if new_t.startswith("extended category"):
                new_t = new_t[len("extended category"):]
            if new_t == t:
                break
            t = new_t
        
        t = t.lower().strip().strip(string.punctuation).strip(string.digits)
        if t not in correct_categories:
            print('Bad category:', t)
        return t

    response = response.strip()

    items = response.rsplit('\n', 1) if '\n' in response else response.rsplit(None, 1)
    try:
        justification, category = items
    except ValueError:
        justification = "(missing)"
        category = items[0]

    return normalize(category), justification


assessed_df = test_dataset_df.copy()


def parallel_apply(df, func, num_threads: int):
    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        results = list(
            tqdm(
                executor.map(
                    func, df.apply(lambda row: format_event(row.to_dict()), axis=1)
                ),
                total=len(df),
            )
        )
    return results


assessed_df[["predictedExtendedCategory", "justification"]] = parallel_apply(
    assessed_df, get_predicted_category, num_threads=3
)
assessed_df

  0%|          | 0/323 [00:00<?, ?it/s]

100%|██████████| 323/323 [00:51<00:00,  6.25it/s]


Unnamed: 0,id,scope,title,description,extendedCategory,tags,Are tags correct?,Reviewed tags,Comments,predictedExtendedCategory,justification
0,a1f86f80-135e-458b-aafc-3af30d2476f2_main_50aa...,Subsurface,Lossesin Heimdal,Heimdal 3754m dynamic mud losses at 6m3/hrs wi...,lostcirculation,{lostcirculation},YES,{lostcirculation},,lostcirculation,1. The event describes significant mud losses ...
1,a1f86f80-135e-458b-aafc-3af30d2476f2_main_3806...,Subsurface,Fault Related Losses,Fault was drilled. Mud losses of 14m3/hr o...,lostcirculation,{lostcirculation},YES,{lostcirculation},,lostcirculation,1. The event describes significant mud losses ...
2,a1f86f80-135e-458b-aafc-3af30d2476f2_main_154d...,Subsurface,Stringers,Set TD early due to not able to penetrate a st...,harddrilling,"{harddrilling, wellborebreathing, lostcirculat...",NO,"{harddrilling, wellborebreathing, lostcirculat...",,wellborestability,1. The event describes difficulties in drillin...
3,a1f86f80-135e-458b-aafc-3af30d2476f2_main_7fea...,Subsurface,Losses in Balder Tuff,Observed indications off losses at 4353 m MD B...,lostcirculation,"{wellcontrol, lostcirculation}",YES,"{wellcontrol, lostcirculation}",,lostcirculation,1. The event describes a situation where there...
4,a1f86f80-135e-458b-aafc-3af30d2476f2_main_aab3...,Subsurface,Losses,Drilled horizontally through Balder / UDC sand...,lostcirculation,"{lostcirculation, wellborestability}",YES,"{lostcirculation, wellborestability}",,lostcirculation,1. The event describes a situation where drill...
...,...,...,...,...,...,...,...,...,...,...,...
333,a1f86f80-135e-458b-aafc-3af30d2476f2_main_9fdf...,Subsurface,Positive drilling break when drilling into Kai...,Possible positive drilling break at Top Kai. T...,other,{},YES,{},,wellcontrol,1. The event describes a situation where an in...
335,a1f86f80-135e-458b-aafc-3af30d2476f2_main_daa4...,Subsurface,Kickoff,Kicked off 2/8-F-7 T3 Category: Direct...,other,{},YES,{},,other,The event describes a kickoff in directional d...
336,a1f86f80-135e-458b-aafc-3af30d2476f2_main_2f0c...,Subsurface,Kickoff,Kicked off 2/8-N-5 T4 Category: Direct...,other,{},YES,{},,other,The event describes a directional drilling ope...
337,a1f86f80-135e-458b-aafc-3af30d2476f2_main_b543...,Subsurface,Kickoff,"Original hole, Mudline 110.3 m Categor...",other,{},YES,{},,other,1. The event describes a kickoff for a directi...


In [249]:
# nothing to do, DDR tagging using regex rules is already applied to the dataset in this experiment
from common.assessment import expand_ext_categories

assessed_df = assessed_df.filter(
    items=[
        "id",
        "scope",
        "title",
        "description",
        "extendedCategory",
        "predictedExtendedCategory",
        "justification",
        "Comments",
    ]
)
assessed_df = expand_ext_categories(
    assessed_df,
    categories_in_scope=CONTEXT.tags_in_scope,
    ground_truth_column="extendedCategory",
    predicted_column="predictedExtendedCategory",
)
assessed_df

Unnamed: 0,id,scope,title,description,extendedCategory,predictedExtendedCategory,justification,Comments,expected__boulders,actual__boulders,...,expected__stuck,actual__stuck,expected__tighthole,actual__tighthole,expected__waterflow,actual__waterflow,expected__wellborestability,actual__wellborestability,expected__wellcontrol,actual__wellcontrol
0,a1f86f80-135e-458b-aafc-3af30d2476f2_main_50aa...,Subsurface,Lossesin Heimdal,Heimdal 3754m dynamic mud losses at 6m3/hrs wi...,lostcirculation,lostcirculation,1. The event describes significant mud losses ...,,False,False,...,False,False,False,False,False,False,False,False,False,False
1,a1f86f80-135e-458b-aafc-3af30d2476f2_main_3806...,Subsurface,Fault Related Losses,Fault was drilled. Mud losses of 14m3/hr o...,lostcirculation,lostcirculation,1. The event describes significant mud losses ...,,False,False,...,False,False,False,False,False,False,False,False,False,False
2,a1f86f80-135e-458b-aafc-3af30d2476f2_main_154d...,Subsurface,Stringers,Set TD early due to not able to penetrate a st...,harddrilling,wellborestability,1. The event describes difficulties in drillin...,,False,False,...,False,False,False,False,False,False,False,True,False,False
3,a1f86f80-135e-458b-aafc-3af30d2476f2_main_7fea...,Subsurface,Losses in Balder Tuff,Observed indications off losses at 4353 m MD B...,lostcirculation,lostcirculation,1. The event describes a situation where there...,,False,False,...,False,False,False,False,False,False,False,False,False,False
4,a1f86f80-135e-458b-aafc-3af30d2476f2_main_aab3...,Subsurface,Losses,Drilled horizontally through Balder / UDC sand...,lostcirculation,lostcirculation,1. The event describes a situation where drill...,,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
333,a1f86f80-135e-458b-aafc-3af30d2476f2_main_9fdf...,Subsurface,Positive drilling break when drilling into Kai...,Possible positive drilling break at Top Kai. T...,other,wellcontrol,1. The event describes a situation where an in...,,False,False,...,False,False,False,False,False,False,False,False,False,True
335,a1f86f80-135e-458b-aafc-3af30d2476f2_main_daa4...,Subsurface,Kickoff,Kicked off 2/8-F-7 T3 Category: Direct...,other,other,The event describes a kickoff in directional d...,,False,False,...,False,False,False,False,False,False,False,False,False,False
336,a1f86f80-135e-458b-aafc-3af30d2476f2_main_2f0c...,Subsurface,Kickoff,Kicked off 2/8-N-5 T4 Category: Direct...,other,other,The event describes a directional drilling ope...,,False,False,...,False,False,False,False,False,False,False,False,False,False
337,a1f86f80-135e-458b-aafc-3af30d2476f2_main_b543...,Subsurface,Kickoff,"Original hole, Mudline 110.3 m Categor...",other,other,1. The event describes a kickoff for a directi...,,False,False,...,False,False,False,False,False,False,False,False,False,False


In [250]:
# from common.datasets import save_assessed_dataset


# save_assessed_dataset(
#     assessed_df,
#     context=CONTEXT,
#     experiment_id=EXPERIMENT_ID,
#     run_id=RUN_ID,
# )

## Evaluate predicted tags

In [251]:
from common.evaluation import TagMatchingEvaluator


evaluator = TagMatchingEvaluator(
    assessed_df=assessed_df,
    tags_in_scope=CONTEXT.tags_in_scope,
    with_notags=CONTEXT.with_notags,
)

In [252]:
evaluator.eval_per_tag()

Unnamed: 0,tag,precision,recall,f1,true_positives,positives_in_ground_truth,negatives_in_ground_truth
0,boulders,0.36,0.9,0.514286,9,10,313
1,harddrilling,0.846154,0.468085,0.60274,22,47,276
2,holecleaning,0.857143,0.666667,0.75,12,18,305
3,lostcirculation,0.864865,1.0,0.927536,32,32,291
4,other,0.825397,0.764706,0.793893,52,68,255
5,packoff,0.190476,1.0,0.32,4,4,319
6,shallowgas,0.625,0.833333,0.714286,10,12,311
7,stuck,0.692308,0.857143,0.765957,18,21,302
8,tighthole,0.904762,0.76,0.826087,19,25,298
9,waterflow,1.0,0.285714,0.444444,2,7,316


In [253]:
evaluator.eval_individual_ddrs()

Unnamed: 0,id,scope,title,description,extendedCategory,predictedExtendedCategory,justification,Comments,expected__boulders,actual__boulders,...,expected__wellborestability,actual__wellborestability,expected__wellcontrol,actual__wellcontrol,expected__notags,actual__notags,precision,recall,f1,true_positives
0,a1f86f80-135e-458b-aafc-3af30d2476f2_main_50aa...,Subsurface,Lossesin Heimdal,Heimdal 3754m dynamic mud losses at 6m3/hrs wi...,lostcirculation,lostcirculation,1. The event describes significant mud losses ...,,False,False,...,False,False,False,False,False,False,1.0,1.0,1.0,1
1,a1f86f80-135e-458b-aafc-3af30d2476f2_main_3806...,Subsurface,Fault Related Losses,Fault was drilled. Mud losses of 14m3/hr o...,lostcirculation,lostcirculation,1. The event describes significant mud losses ...,,False,False,...,False,False,False,False,False,False,1.0,1.0,1.0,1
2,a1f86f80-135e-458b-aafc-3af30d2476f2_main_154d...,Subsurface,Stringers,Set TD early due to not able to penetrate a st...,harddrilling,wellborestability,1. The event describes difficulties in drillin...,,False,False,...,False,True,False,False,False,False,0.0,0.0,0.0,0
3,a1f86f80-135e-458b-aafc-3af30d2476f2_main_7fea...,Subsurface,Losses in Balder Tuff,Observed indications off losses at 4353 m MD B...,lostcirculation,lostcirculation,1. The event describes a situation where there...,,False,False,...,False,False,False,False,False,False,1.0,1.0,1.0,1
4,a1f86f80-135e-458b-aafc-3af30d2476f2_main_aab3...,Subsurface,Losses,Drilled horizontally through Balder / UDC sand...,lostcirculation,lostcirculation,1. The event describes a situation where drill...,,False,False,...,False,False,False,False,False,False,1.0,1.0,1.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
333,a1f86f80-135e-458b-aafc-3af30d2476f2_main_9fdf...,Subsurface,Positive drilling break when drilling into Kai...,Possible positive drilling break at Top Kai. T...,other,wellcontrol,1. The event describes a situation where an in...,,False,False,...,False,False,False,True,False,False,0.0,0.0,0.0,0
335,a1f86f80-135e-458b-aafc-3af30d2476f2_main_daa4...,Subsurface,Kickoff,Kicked off 2/8-F-7 T3 Category: Direct...,other,other,The event describes a kickoff in directional d...,,False,False,...,False,False,False,False,False,False,1.0,1.0,1.0,1
336,a1f86f80-135e-458b-aafc-3af30d2476f2_main_2f0c...,Subsurface,Kickoff,Kicked off 2/8-N-5 T4 Category: Direct...,other,other,The event describes a directional drilling ope...,,False,False,...,False,False,False,False,False,False,1.0,1.0,1.0,1
337,a1f86f80-135e-458b-aafc-3af30d2476f2_main_b543...,Subsurface,Kickoff,"Original hole, Mudline 110.3 m Categor...",other,other,1. The event describes a kickoff for a directi...,,False,False,...,False,False,False,False,False,False,1.0,1.0,1.0,1


In [254]:
evaluator.average_metrics()

Unnamed: 0,Type,precision,recall,f1
0,Average per DDR,0.662539,0.662539,0.662539
1,Average per Tag,0.674149,0.719571,0.632745


## Save evaluation report

In [255]:
from common.datasets import save_evaluation_report

save_evaluation_report(
    experiment_id=EXPERIMENT_ID,
    run_id=RUN_ID,
    dataset_df=dataset_df,
    assessed_df=assessed_df,
    evaluator=evaluator,
    context=CONTEXT,
)