### Imports

In [None]:
import json
from rhino_inference_helpers import (
    load_tokenizer_and_model, 
    read_data, 
    inference, 
    create_dataset, 
    read_output_datasets
)

### Read the radiology reports

In [None]:
# Reads the input report file from /input/dataset.csv, where a dataset has been pre-loaded via FCP
reports = read_data()
reports

In [None]:
# Display the first report
report = reports['report_content'][0].strip()
print(report)

### Instantiate the Tokenizer and Model Objects

In [None]:
# Add the path to the model artifacts. Use /external_data in case of reading data from S3
model_artifacts = "/external_data/MY_MODEL"
tokenizer, model = load_tokenizer_and_model(model_artifacts,device_map = "cuda")

### Run Inference in Q&A Format

In [None]:
# Add your question here
responses = []
for i,report in enumerate(reports['report_content']):
    input_text = 'Extract clinical entities from the text. \
                  Do not extract negative mention of an entity. \
                  Identify current findings and historical mentions: \
                  Format the response as JSON \
                  \n\n' + report
    responses.append(inference(input_text, tokenizer, model))
    print(f'Report number {i+1} analyzed')

In [None]:
# Responses post-processing
responses_as_json = []
for response in responses:
    responses_as_json.append(json.loads(response))

# Display the first LLM output
print(responses_as_json[0])

### Create New Dataset for FCP


In [None]:
# Adding new column with desired information from LLM inference
reports['responses'] = responses_as_json
reports

In [None]:
# Save as a new dataset
create_dataset(reports)

In [None]:
# Verify the output dataset

output_df = read_output_datasets()
output_df


