## Set up environment

In [1]:
from dotenv import load_dotenv

_ = load_dotenv(override=True)

In [2]:
import os
from common.context import LLMTagPredictionContext


EXPERIMENT_ID = "2024-10-17-try-llm"
RUN_ID = "2-gpt-4o-mini"


CONTEXT = LLMTagPredictionContext(
    description="Try asking LLM to assess all tags at once. Give examples.",
    experiment_id=EXPERIMENT_ID,
    run_id=RUN_ID,
    tags_in_scope=sorted(
        [
            "wait",
            "surfeqfailure",
            "tighthole",
            "lostcirculation",
            "packoff",
            "harddrilling",
            "stuckpipe",
            "wellcontrol",
            "wellborestability",
            "lowrop",
            "dircontrol",
            "highrop",
            "wellborebreathing",
            "boulders",
            "shallowgas",
            "dheqfailure",
            "holecleaning",
            "shallowwater",
        ]
    ),
    llm_model=os.environ["AZURE_OPENAI_DEPLOYMENT_ID"],
    with_notags=True,
)

## Fetch datasets

In [3]:
from common.datasets import load_input_dataset

dataset_df = load_input_dataset(
    "reviewed_distributed_ddr.csv",
    columns_to_convert_to_sets=["tags", "Reviewed tags"],
)
dataset_df

Unnamed: 0,id,Text,phase,code,subCode,tags,Are tags correct?,Reviewed tags,Comments
0,a1f86f80-135e-458b-aafc-3af30d2476f2_main_61b0...,Circulated hole with reduced flow due to sand ...,INTCSG1,N,CIR,{shallowwater},YES,{shallowwater},
1,a1f86f80-135e-458b-aafc-3af30d2476f2_main_61b0...,"Circulated hole (4400 lpm, 70 rpm, 2-4 kNm) wh...",INTCSG1,N,CIR,{shallowwater},YES,{shallowwater},
2,a1f86f80-135e-458b-aafc-3af30d2476f2_main_d1ce...,Circulated hole with reduced flow due to sand ...,INTCSG1,N,CIR,{shallowwater},YES,{shallowwater},
3,a1f86f80-135e-458b-aafc-3af30d2476f2_main_d1ce...,"Circulated hole (4400 lpm, 70 rpm, 2-4 kNm) wh...",INTCSG1,N,CIR,{shallowwater},YES,{shallowwater},
4,a1f86f80-135e-458b-aafc-3af30d2476f2_main_e6c0...,Laid down cement head. Moved rig to J-3 slot....,SURF,P,LD,{shallowwater},YES,{shallowwater},
...,...,...,...,...,...,...,...,...,...
1437,a1f86f80-135e-458b-aafc-3af30d2476f2_main_7965...,Meeting with onshore forward plan.\nMeanwhile:...,INTERV,N,SAFETY,{},,{},
1438,a1f86f80-135e-458b-aafc-3af30d2476f2_main_fc63...,Drilled from 3903 m to 3921 m. - WOB = 6 - ...,RES1,P,DRL,{},,{},
1439,a1f86f80-135e-458b-aafc-3af30d2476f2_main_12d5...,CIRC OUT FILL W/ 80 SPM/2000 PSI.,PROD1,C,,{},,{},
1440,a1f86f80-135e-458b-aafc-3af30d2476f2_main_644e...,"Drilled 12-1/4"" hole from 3916m to 3931m with...",INT2,P,DRLDIR,{},,{lostcirculation},


## Apply the model

In [4]:
import pandas as pd
from common.llm import ask_openai

examples = dataset_df.sort_values(by="Text", key=lambda x: x.str.len())
examples = examples[examples["Text"].str.len() > 40]

examples = [
    examples[examples["Reviewed tags"].apply(lambda tags: tag in tags)][
        ["Text", "Reviewed tags"]
    ].head(2)
    for tag in CONTEXT.tags_in_scope
]

# Concatenate the examples into a single DataFrame
examples = pd.concat(examples, ignore_index=True)
# Convert 'Reviewed tags' to a sorted list
examples["Reviewed tags"] = examples["Reviewed tags"].apply(
    lambda tags: tuple(sorted(tags))
)
# Drop duplicate rows
examples = examples.drop_duplicates()
examples = examples.sort_values(by="Text")
examples

Unnamed: 0,Text,Reviewed tags
11,"""Circulated hole clean due to poor hole cleani...","(holecleaning,)"
9,(424) Evaluated further directional drilling o...,"(highrop,)"
3,ATTEMPT TO TAKE SURVEYS - NO GO DUE TO MWD FAI...,"(dheqfailure,)"
17,Attempted to go back drilling - nogo. Pack-off...,"(packoff,)"
26,"BACKREAM TO 2700M W/65 SPM/3500 PSI, TIGHT SPO...","(tighthole,)"
23,BHA stopped in LGLM. Jarred up 1 time to get f...,"(stuckpipe,)"
33,CIRCULATED BOTTOMS UP WITH 310 GPM/3000\nPSI/1...,"(wellborestability,)"
25,Changed out RLA motor due to hydraulic leak.,"(surfeqfailure,)"
10,Circulate due to excess cuttings at shakers.,"(holecleaning,)"
6,Cont. reaming F/154m - 181m 10mph 40-120RPM(li...,"(harddrilling,)"


In [5]:
SYSTEM_PROMPT = f"""
You will be given a description of a daily drilling report, and your task is to select which of these tags apply to this report.
Possible tags: {', '.join(CONTEXT.tags_in_scope)}

Respond only with the list of tags that apply to the report, separated by commas.

"""

for idx, (_, row) in enumerate(examples.iterrows()):
    SYSTEM_PROMPT += f"\n## Example report {idx}\nText: {row['Text']}\n\n## Correct response\n{', '.join(row['Reviewed tags'])}\n"

CONTEXT.llm_system_prompt = SYSTEM_PROMPT

print(SYSTEM_PROMPT)
print(len(SYSTEM_PROMPT))


You will be given a description of a daily drilling report, and your task is to select which of these tags apply to this report.
Possible tags: boulders, dheqfailure, dircontrol, harddrilling, highrop, holecleaning, lostcirculation, lowrop, packoff, shallowgas, shallowwater, stuckpipe, surfeqfailure, tighthole, wait, wellborebreathing, wellborestability, wellcontrol

Respond only with the list of tags that apply to the report, separated by commas.


## Example report 0
Text: "Circulated hole clean due to poor hole cleaning. Circulated with3800 lpm, 136 bar, 120 rpm."

## Correct response
holecleaning

## Example report 1
Text: (424) Evaluated further directional drilling options due to Powerdrive w/PDC not achieving sufficient build-rate and poor azimuthal control (right walk). Circulating with 300 gpm, 700 psi.  High formation gas (max 46,1%) due to high ROP in the reservoir section. BGG 1,5-8%, TG 4%, CG max 12,2%.

## Correct response
highrop

## Example report 2
Text: ATTEMPT TO T

In [6]:
TEMPERATURE = 0

CONTEXT.llm_temperature = TEMPERATURE

In [7]:
from concurrent.futures import ThreadPoolExecutor
import os
from tqdm.auto import tqdm


# Define a function to call ask_openai and get the predicted tags
def get_predicted_tags(text):
    try:
        response = ask_openai(
            azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
            api_key=os.environ["AZURE_OPENAI_KEY"],
            api_version=os.environ["AZURE_OPENAI_API_VERSION"],
            deployment_name=os.environ["AZURE_OPENAI_DEPLOYMENT_ID"],
            system_prompt=SYSTEM_PROMPT,
            prompt=text,
        )
    except Exception as e:
        if "content management policy. Please modify your prompt" in str(e):
            print(e)
            return []  # running into the content filter
        raise

    def normalize_tag(t):
        # sometimes model makes mistakes
        t = t.lower()
        if t.startswith("tags:"):
            t = t[len("tags:") :]
        t = t.strip().strip("()")
        return t

    tags = set(normalize_tag(t) for t in response.strip().split(",")).intersection(
        CONTEXT.tags_in_scope
    )
    return tags


assessed_df = dataset_df.copy()


def parallel_apply(df, func, num_threads: int):
    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        results = list(tqdm(executor.map(func, df["Text"]), total=len(df)))
    return results


assessed_df["Predicted"] = parallel_apply(
    assessed_df, get_predicted_tags, num_threads=2
)
assessed_df

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 1442/1442 [06:37<00:00,  3.63it/s] 


Unnamed: 0,id,Text,phase,code,subCode,tags,Are tags correct?,Reviewed tags,Comments,Predicted
0,a1f86f80-135e-458b-aafc-3af30d2476f2_main_61b0...,Circulated hole with reduced flow due to sand ...,INTCSG1,N,CIR,{shallowwater},YES,{shallowwater},,"{holecleaning, shallowwater}"
1,a1f86f80-135e-458b-aafc-3af30d2476f2_main_61b0...,"Circulated hole (4400 lpm, 70 rpm, 2-4 kNm) wh...",INTCSG1,N,CIR,{shallowwater},YES,{shallowwater},,"{shallowwater, wait}"
2,a1f86f80-135e-458b-aafc-3af30d2476f2_main_d1ce...,Circulated hole with reduced flow due to sand ...,INTCSG1,N,CIR,{shallowwater},YES,{shallowwater},,"{holecleaning, shallowwater}"
3,a1f86f80-135e-458b-aafc-3af30d2476f2_main_d1ce...,"Circulated hole (4400 lpm, 70 rpm, 2-4 kNm) wh...",INTCSG1,N,CIR,{shallowwater},YES,{shallowwater},,"{shallowwater, wait}"
4,a1f86f80-135e-458b-aafc-3af30d2476f2_main_e6c0...,Laid down cement head. Moved rig to J-3 slot....,SURF,P,LD,{shallowwater},YES,{shallowwater},,{shallowwater}
...,...,...,...,...,...,...,...,...,...,...
1437,a1f86f80-135e-458b-aafc-3af30d2476f2_main_7965...,Meeting with onshore forward plan.\nMeanwhile:...,INTERV,N,SAFETY,{},,{},,"{stuckpipe, wait}"
1438,a1f86f80-135e-458b-aafc-3af30d2476f2_main_fc63...,Drilled from 3903 m to 3921 m. - WOB = 6 - ...,RES1,P,DRL,{},,{},,"{shallowgas, highrop}"
1439,a1f86f80-135e-458b-aafc-3af30d2476f2_main_12d5...,CIRC OUT FILL W/ 80 SPM/2000 PSI.,PROD1,C,,{},,{},,{holecleaning}
1440,a1f86f80-135e-458b-aafc-3af30d2476f2_main_644e...,"Drilled 12-1/4"" hole from 3916m to 3931m with...",INT2,P,DRLDIR,{},,{lostcirculation},,"{lostcirculation, shallowgas}"


In [8]:
# nothing to do, DDR tagging using regex rules is already applied to the dataset in this experiment
from common.assessment import expand_tags

assessed_df = expand_tags(
    assessed_df,
    tags_in_scope=CONTEXT.tags_in_scope,
    ground_truth_tags_column="Reviewed tags",
    predicted_tags_column="Predicted",
)
assessed_df

Unnamed: 0,id,Text,phase,code,subCode,tags,Are tags correct?,Comments,expected__boulders,actual__boulders,...,expected__tighthole,actual__tighthole,expected__wait,actual__wait,expected__wellborebreathing,actual__wellborebreathing,expected__wellborestability,actual__wellborestability,expected__wellcontrol,actual__wellcontrol
0,a1f86f80-135e-458b-aafc-3af30d2476f2_main_61b0...,Circulated hole with reduced flow due to sand ...,INTCSG1,N,CIR,{shallowwater},YES,,False,False,...,False,False,False,False,False,False,False,False,False,False
1,a1f86f80-135e-458b-aafc-3af30d2476f2_main_61b0...,"Circulated hole (4400 lpm, 70 rpm, 2-4 kNm) wh...",INTCSG1,N,CIR,{shallowwater},YES,,False,False,...,False,False,False,True,False,False,False,False,False,False
2,a1f86f80-135e-458b-aafc-3af30d2476f2_main_d1ce...,Circulated hole with reduced flow due to sand ...,INTCSG1,N,CIR,{shallowwater},YES,,False,False,...,False,False,False,False,False,False,False,False,False,False
3,a1f86f80-135e-458b-aafc-3af30d2476f2_main_d1ce...,"Circulated hole (4400 lpm, 70 rpm, 2-4 kNm) wh...",INTCSG1,N,CIR,{shallowwater},YES,,False,False,...,False,False,False,True,False,False,False,False,False,False
4,a1f86f80-135e-458b-aafc-3af30d2476f2_main_e6c0...,Laid down cement head. Moved rig to J-3 slot....,SURF,P,LD,{shallowwater},YES,,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1437,a1f86f80-135e-458b-aafc-3af30d2476f2_main_7965...,Meeting with onshore forward plan.\nMeanwhile:...,INTERV,N,SAFETY,{},,,False,False,...,False,False,False,True,False,False,False,False,False,False
1438,a1f86f80-135e-458b-aafc-3af30d2476f2_main_fc63...,Drilled from 3903 m to 3921 m. - WOB = 6 - ...,RES1,P,DRL,{},,,False,False,...,False,False,False,False,False,False,False,False,False,False
1439,a1f86f80-135e-458b-aafc-3af30d2476f2_main_12d5...,CIRC OUT FILL W/ 80 SPM/2000 PSI.,PROD1,C,,{},,,False,False,...,False,False,False,False,False,False,False,False,False,False
1440,a1f86f80-135e-458b-aafc-3af30d2476f2_main_644e...,"Drilled 12-1/4"" hole from 3916m to 3931m with...",INT2,P,DRLDIR,{},,,False,False,...,False,False,False,False,False,False,False,False,False,False


In [9]:
from common.datasets import save_assessed_dataset

save_assessed_dataset(
    assessed_df,
    context=CONTEXT,
    experiment_id=EXPERIMENT_ID,
    run_id=RUN_ID,
)

2_assessed_datasets/2024-10-17-try-llm-2-gpt-4o-mini


## Evaluate predicted tags

In [10]:
from common.evaluation import TagMatchingEvaluator

evaluator = TagMatchingEvaluator(
    assessed_df=assessed_df,
    tags_in_scope=CONTEXT.tags_in_scope,
    with_notags=CONTEXT.with_notags,
)

In [11]:
evaluator.eval_individual_ddrs()

Unnamed: 0,id,Text,phase,code,subCode,tags,Are tags correct?,Comments,expected__boulders,actual__boulders,...,expected__wellborestability,actual__wellborestability,expected__wellcontrol,actual__wellcontrol,expected__notags,actual__notags,precision,recall,f1,true_positives
0,a1f86f80-135e-458b-aafc-3af30d2476f2_main_61b0...,Circulated hole with reduced flow due to sand ...,INTCSG1,N,CIR,{shallowwater},YES,,False,False,...,False,False,False,False,False,False,0.5,1.0,0.666667,1
1,a1f86f80-135e-458b-aafc-3af30d2476f2_main_61b0...,"Circulated hole (4400 lpm, 70 rpm, 2-4 kNm) wh...",INTCSG1,N,CIR,{shallowwater},YES,,False,False,...,False,False,False,False,False,False,0.5,1.0,0.666667,1
2,a1f86f80-135e-458b-aafc-3af30d2476f2_main_d1ce...,Circulated hole with reduced flow due to sand ...,INTCSG1,N,CIR,{shallowwater},YES,,False,False,...,False,False,False,False,False,False,0.5,1.0,0.666667,1
3,a1f86f80-135e-458b-aafc-3af30d2476f2_main_d1ce...,"Circulated hole (4400 lpm, 70 rpm, 2-4 kNm) wh...",INTCSG1,N,CIR,{shallowwater},YES,,False,False,...,False,False,False,False,False,False,0.5,1.0,0.666667,1
4,a1f86f80-135e-458b-aafc-3af30d2476f2_main_e6c0...,Laid down cement head. Moved rig to J-3 slot....,SURF,P,LD,{shallowwater},YES,,False,False,...,False,False,False,False,False,False,1.0,1.0,1.000000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1437,a1f86f80-135e-458b-aafc-3af30d2476f2_main_7965...,Meeting with onshore forward plan.\nMeanwhile:...,INTERV,N,SAFETY,{},,,False,False,...,False,False,False,False,True,False,0.0,0.0,0.000000,0
1438,a1f86f80-135e-458b-aafc-3af30d2476f2_main_fc63...,Drilled from 3903 m to 3921 m. - WOB = 6 - ...,RES1,P,DRL,{},,,False,False,...,False,False,False,False,True,False,0.0,0.0,0.000000,0
1439,a1f86f80-135e-458b-aafc-3af30d2476f2_main_12d5...,CIRC OUT FILL W/ 80 SPM/2000 PSI.,PROD1,C,,{},,,False,False,...,False,False,False,False,True,False,0.0,0.0,0.000000,0
1440,a1f86f80-135e-458b-aafc-3af30d2476f2_main_644e...,"Drilled 12-1/4"" hole from 3916m to 3931m with...",INT2,P,DRLDIR,{},,,False,False,...,False,False,False,False,False,False,0.5,1.0,0.666667,1


In [12]:
evaluator.eval_per_tag()

Unnamed: 0,tag,precision,recall,f1,true_positives,positives_in_ground_truth,negatives_in_ground_truth
0,boulders,0.846154,1.0,0.916667,66,66,1376
1,dheqfailure,0.723077,0.79661,0.758065,47,59,1383
2,dircontrol,0.494048,1.0,0.661355,83,83,1359
3,harddrilling,0.371795,0.557692,0.446154,58,104,1338
4,highrop,0.059701,0.888889,0.111888,8,9,1433
5,holecleaning,0.254054,0.959184,0.401709,47,49,1393
6,lostcirculation,0.52,0.975,0.678261,117,120,1322
7,lowrop,0.636364,0.205882,0.311111,28,136,1306
8,packoff,0.8125,1.0,0.896552,78,78,1364
9,shallowgas,0.048276,1.0,0.092105,7,7,1435


In [13]:
evaluator.average_metrics()

Unnamed: 0,Type,precision,recall,f1
0,Average per DDR,0.426549,0.549595,0.462276
1,Average per Tag,0.481531,0.822089,0.543628


## Save evaluation report

In [14]:
from common.datasets import save_evaluation_report

save_evaluation_report(
    experiment_id=EXPERIMENT_ID,
    run_id=RUN_ID,
    dataset_df=dataset_df,
    assessed_df=assessed_df,
    evaluator=evaluator,
    context=CONTEXT,
)