# Flag Multiple Tumours

In [None]:
import pandas as pd
from dotenv import load_dotenv

from src.helpers import load_config_from_yaml
from src.text_preprocessor import TextPreprocessor
from src.prompt_builder import PromptBuilder
from src.model_request import ModelRequest
from src.extractor_pipeline import ExtractorPipeline
from src.post_processor import PostProcessor
from src.custom_logging import setup_logging
from src.cme_evaluator import CMEEvaluator
from src.load_data import save_eval_df_to_s3, load_dataframe_from_s3, save_dataframe_to_s3
import config.pipeline_config as conf
from config.validation_config import MultiSchema

## Load config

In [None]:
# Define config file path
conf_file_path = "./config/local.yaml"
# Load config
yaml_conf = load_config_from_yaml(file_path=conf_file_path)

# Get bucket name and data name from config.
bucket_name = yaml_conf.get("BUCKET_NAME")
data_name = yaml_conf.get("THE_DATA")

# Model and S3 info:
model_id = yaml_conf.get("MODEL_ID")
model_args = yaml_conf.get("MODEL_ARGS")
output_folder = yaml_conf.get("MULTI_TUMOUR_S3_FOLDER")
cme_prompt_id = yaml_conf.get("MULTI_TUMOUR_PROMPT_MANAGEMENT_ID")
cme_prompt_name = yaml_conf.get("MULTI_TUMOUR_PROMPT_MANAGEMENT_NAME")

# Load in the record table. Use the .head(x) to only use the first x reports (useful for a quick test).
float_columns = [f"ER_SCORE_{i+1}" for i in range(4)] + [f"PR_SCORE_{i+1}" for i in range(4)]
records = load_dataframe_from_s3(bucket_name, data_name, float_columns).head(5)

# Load dotenv
load_dotenv()

print(f"There are {records.shape[0]} records in this dataframe.")

In [None]:
post_processor = PostProcessor(records, conf.multi_tumour_accepted_values.keys(), conf.multi_tumour_accepted_values)
records["Multiple Tumours"] = records["Multiple Tumours"].fillna("0")
records = records.apply(post_processor.apply_general_mapping, mapping = {"y":"1"}, cols_to_map = ["Multiple Tumours"], axis = 1)

records["Multiple Tumours New"] = records["Multiple Tumours New"].fillna("0")
records = records.apply(post_processor.apply_general_mapping, mapping = {"y":"1"}, cols_to_map = ["Multiple Tumours New"], axis = 1)

## Edit prompt

In [None]:
system_prompt = "SYSTEM PROMPT HERE"
prompt_layout = """
PROMPT HERE
"""

In [None]:
# RUN THE PIPELINE HERE
setup_logging(enable_console=False,
              enable_file=True,
              console_log_level=conf.console_log_level,
              log_dir=conf.log_dir)

preprocessor = TextPreprocessor()

# prompter = PromptBuilder(model_id = model_id,
#                          prompt_layout = prompt_layout,
#                          system_prompt = system_prompt)

# Use the below prompter instead if you want to define a prompt version from prompt management.
prompter = PromptBuilder(model_id = model_id,
                         system_prompt = conf.multi_tumour_system_prompt,
                         prompt_id = cme_prompt_id,
                         prompt_version = conf.multi_tumour_prompt_version)

requester = ModelRequest(model_id,
                         model_args,
                         prompter)

extractor_pipeline = ExtractorPipeline(config_file_path=conf_file_path,
                                       preprocessor=preprocessor,
                                       model_request=requester,
                                       valid_structure=MultiSchema,
                                       accepted_values = conf.multi_tumour_accepted_values)

output_df = extractor_pipeline.run(df=records)

## Evaluation

In [None]:
# DEFINE WHICH COLUMNS TO COMPARE
original_compare_cols = {"Multiple Tumours": "multi_tumour","Multiple Tumours New": "multi_tumour"}
status_column = 'status'

In [None]:
# SET UP THE EVALUATOR
eval_df = records.merge(output_df, on = "PATHOLOGY_ID")

evaluator = CMEEvaluator(comparison_dict=original_compare_cols,
                         accepted_values=conf.multi_tumour_accepted_values,
                         id_col="PATHOLOGY_ID",
                         df=eval_df)

In [None]:
# evaluator = CMEEvaluator(comparison_dict=original_compare_cols,
#                          accepted_values=conf.multi_tumour_accepted_values,
#                          id_col="PATHOLOGY_ID",
#                          bucket_name=bucket_name,
#                          folder=output_folder,
#                          list_saved=True
#                         )

### First Check the breakdown of statuses

* **valid**: This means output parsed to a JSON and all the keys of the JSON is present, and the values are in the accepted value list.
* **partial**: This means the output parsed to a JSON, but some of the keys are missing or a value for a given key is not an accepted value.
* **invalid**: This means the output parsed to a JSON, but none of the expected keys are present.
* **validation_failed**: This means the output was unable to parse to JSON.

We want to maximise the number of valids.

In [None]:
evaluator.get_status_summary(status_column)

In [None]:
evaluator.get_validation_failed(status_column)

In [None]:
evaluator.get_invalid(status_column)

In [None]:
evaluator.get_non_accepted_summary_all()

In [None]:
evaluator.print_text(text_col="model_output", id_val=0)

### Plots

In [None]:
evaluator.plot_correctness_and_rowwise_distribution()

In [None]:
evaluator.plot_per_metric_plots_for_all()

### Exploring the specific differences between Actual and Expected

In [None]:
actual_column = "Multiple Tumours New"
extracted_column = original_compare_cols[actual_column]

# Change this to the value you see
actual_value = "0" # i.e. positive
extracted_value = "1" # i.e. negative

# Extract out the values for eval_df
evaluator.df[(evaluator.df[actual_column] == actual_value) & (evaluator.df[extracted_column] == extracted_value)]

In [None]:
evaluator.print_text(text_col="preprocessed_REPORT", id_val=0)

In [None]:
output_df

In [None]:
prompter.list_prompt_versions(cme_prompt_id)

In [None]:
overall_description = "DESCRIPTION" # Tell me what was good about this run, was changes were made, etc. Why did you save it?

In [None]:
# Saves Prompt Version - you will need to make sure the version is the correct one. 
# prompter.save_prompt_version(cme_prompt_id, cme_prompt_name, 1, overall_description)

# Saves Evaluation Outputs
# save_eval_df_to_s3(df=eval_df,
#                    bucket_name=bucket_name,
#                    folder=output_folder,
#                    description=overall_description)