In [1]:
from radprompter import Prompt, RadPrompter, vLLMClient, OllamaClient, OpenAIClient

prompt = Prompt("SIIM.toml")
MODEL = 'llama3-70b'
INPUT_FILE = '~/Desktop/SIIMCombinedReports.xlsx'
OUTPUT_FILE = '~/Desktop/SIIM_Results-' + MODEL + '.csv'
TEMP_OUT = '~/Desktop/output-' + MODEL + '.csv'

# use this command to port-forward from a server (roqril0006a) running vllm which will then appear on localhost port 10000:
# ssh -N -L localhost:10000:localhost:8000 bje01@roqril006a&

client = vLLMClient(
    model="meta-llama/Meta-Llama-3-70B-Instruct",
    base_url="http://localhost:10000/v1",
    temperature=0.0,
    seed=42
)
'''
client = OllamaClient(
    model="llama3",
    base_url="http://localhost:11434/v1",
    temperature=0.0,
    seed=42,
#    hide_blocks=True
)
'''
'''
client = OpenAIClient(
    model="GPT-4o",
    api_key="",
    temperature=0.0,
    seed=42
)
'''
# delete any prior output
import os
if os.path.exists(TEMP_OUT):
    os.remove(TEMP_OUT)


engine = RadPrompter(
    client=client,
    prompt=prompt, 
    hide_blocks=True,
    output_file=TEMP_OUT,
)



In [2]:
# now read in the reports from SIIMCombinedReports.xlsx into a pandas dataframe
import pandas as pd
import numpy as np 

# Load the Excel file into a DataFrame
reports_df = pd.read_excel(INPUT_FILE)
# strip spaces out of the FIndings column
reports_df['Findings'] = reports_df['Findings'].str.replace(' ', '')

reports_df['Report'] = reports_df['Report'].str.replace('\n', '')
reports_df['Report'] = reports_df['Report'].str.replace('_0x000D_', '')
reports_df['Report'] = reports_df['Report'].str.replace('    ', '')
reports_df['Report'] = reports_df['Report'].str.replace('  ', '')

reports_df = reports_df.replace({np.nan: 'No', 'None': 'No'})

reports_df

Unnamed: 0,Report,Findings,ExamClass
0,EXAM: CT CERVICAL SPINE WITHOUT IV CONTRAST CO...,C1FX,Cervical Spine Fracture
1,EXAM: CT CERVICAL SPINE WITHOUT IV CONTRAST CO...,C1FX,Cervical Spine Fracture
2,EXAM: CT CERVICAL SPINE WITHOUT IV CONTRAST CO...,C1FX,Cervical Spine Fracture
3,"EXAM: CT CERVICAL SPINE WITHOUT IV CONTRAST, C...","C1FX,C2FX",Cervical Spine Fracture
4,EXAM: CT CERVICAL SPINE WITHOUT IV CONTRAST CO...,"C1FX,C2FX",Cervical Spine Fracture
...,...,...,...
764,INDICATION: LETHARGY;_x000D__x000D_ACCESSION N...,No,Pneumonia
765,INDICATION: SOB;_x000D__x000D_ACCESSION No:_x0...,No,Pneumonia
766,INDICATION: CHEST PAIN;_x000D__x000D_ACCESSION...,No,Pneumonia
767,INDICATION: BRADYCARDIA;_x000D__x000D_ACCESSIO...,No,Pneumonia


In [3]:
# Splitting the reports_df into separate dataframes based on the 'ExamClass' column

# Creating a dictionary to hold the dataframes for each category
categories = reports_df['ExamClass'].unique()
print (categories)
dfs = {category: reports_df[reports_df['ExamClass'] == category] for category in categories}

# Now dfs dictionary contains separate dataframes for each category in 'ExamClass'
# For example, to access the dataframe for 'Cervical Spine Fracture', you can use dfs['Cervical Spine Fracture']
dfs['Cervical Spine Fracture']

['Cervical Spine Fracture' 'Glioma progression' 'Intracranial hemorrhage'
 'Liver metastases' 'Pneumonia' 'Pulmonary Embolism']


Unnamed: 0,Report,Findings,ExamClass
0,EXAM: CT CERVICAL SPINE WITHOUT IV CONTRAST CO...,C1FX,Cervical Spine Fracture
1,EXAM: CT CERVICAL SPINE WITHOUT IV CONTRAST CO...,C1FX,Cervical Spine Fracture
2,EXAM: CT CERVICAL SPINE WITHOUT IV CONTRAST CO...,C1FX,Cervical Spine Fracture
3,"EXAM: CT CERVICAL SPINE WITHOUT IV CONTRAST, C...","C1FX,C2FX",Cervical Spine Fracture
4,EXAM: CT CERVICAL SPINE WITHOUT IV CONTRAST CO...,"C1FX,C2FX",Cervical Spine Fracture
...,...,...,...
646,EXAM: CT CERVICAL SPINE WITHOUT IV CONTRASTCO...,C3FX,Cervical Spine Fracture
647,EXAM: CT CERVICAL SPINE WITHOUT IV CONTRASTCO...,"C6FX,C7FX",Cervical Spine Fracture
648,EXAM: CT CERVICAL SPINE WITHOUT IV CONTRASTCO...,C1FX,Cervical Spine Fracture
649,EXAM: CT CERVICAL SPINE WITHOUT IV CONTRASTCO...,C3FX,Cervical Spine Fracture


In [4]:
# Summing up the number of rows with 'None' and not 'None' in the 'Findings' column for each category

# Initialize a dictionary to store the results
category_summary = {}

# Iterate over each category dataframe
for category, df in dfs.items():
    unique_values = df['Findings'].unique()  # Get unique values in 'Findings' column
    unique_counts = df['Findings'].value_counts()  # Count the number of each unique value
    total_count = len(df)  # Total number of rows
    category_summary[category] = {
        'Unique_Values': unique_values,
        'Unique_Counts': unique_counts,
        'Total': total_count
    }
# Print the results for each column
for category, counts in category_summary.items():
    print(f"Category: {category}")
    for value, count in counts['Unique_Counts'].items():
        print(f"{value}: {count} {count*100//counts['Total']}%")
    print(f"Total: {counts['Total']}")
    print()


Category: Cervical Spine Fracture
No: 84 62%
C2FX: 14 10%
C1FX: 6 4%
C7FX: 6 4%
C6FX: 4 2%
C5FX,C6FX: 3 2%
C6FX,C7FX: 3 2%
C1FX,C2FX: 2 1%
C3FX: 2 1%
C4FX,C5FX: 2 1%
C4FX: 2 1%
C2FX,C7FX: 1 0%
C2FX,C5FX,C6FX: 1 0%
C4FX,C5FX,C7FX: 1 0%
C2FX,C3FX: 1 0%
C2FX,C3FX,C4FX: 1 0%
C5FX: 1 0%
Total: 134

Category: Glioma progression
STABLE: 105 57%
PROGRESSION: 50 27%
IMPROVED: 27 14%
PSEUDOPROGRESSION: 1 0%
Total: 183

Category: Intracranial hemorrhage
No: 62 47%
SDH: 36 27%
SDH,EDH: 5 3%
IPH: 3 2%
SAH: 3 2%
SDH,SAH,IPH: 2 1%
SDH,SAH: 2 1%
SAH,IPH: 2 1%
IVH,SAH: 1 0%
IVH,IPH,SAH,INFARCT: 1 0%
IVH,IPH: 1 0%
SAH,SDH,IVH,IPH: 1 0%
IPH,IVH: 1 0%
SAH,IPH,IVH: 1 0%
SAH,IPH,IVH,SDH: 1 0%
SAH,IVH: 1 0%
SAH,IVH,IPH: 1 0%
IVH,INFARCT: 1 0%
IVH: 1 0%
SDH,IPH: 1 0%
SDH,IPH,FX: 1 0%
IPH,SDH: 1 0%
IPH,SAH,IVH: 1 0%
SDH,SAH,IVH: 1 0%
Total: 131

Category: Liver metastases
Present: 55 52%
No: 50 47%
Total: 105

Category: Pneumonia
Present: 63 54%
No: 53 45%
Total: 116

Category: Pulmonary Embolism
Present: 51 5

In [5]:
# Extract all reports from the 'Report' column and clean them by removing extra whitespace and blank lines
reports = [{'report': report.strip(), 'filename': category} for report, category in zip(reports_df['Report'], reports_df['ExamClass']) if report.strip()]


In [6]:

print ('Doing inference...')
out=engine(reports)


Doing inference...


Processing items:   0%|          | 0/769 [00:20<?, ?it/s]


In [None]:
import pandas as pd

output_df = pd.read_csv(TEMP_OUT, index_col='index')
# rename the colume in output_df from 'filename' to 'ExamClass'
out_df = output_df.rename(columns={'filename': 'ExamClass'}) 

# Delete the column with reports
out_df.drop(columns=['report'], inplace=True, axis=1)
# Merge the 'Findings' column from reports_df into output_df
out_df = out_df.join(reports_df['Findings'])

out_df


FileNotFoundError: [Errno 2] No such file or directory: 'output.csv'

In [None]:

OUTPUT_FILE = '~/Desktop/SIIM_Results-llama3-70b.csv'
# Write the combined dataframe to a CSV fil
out_df.replace('Absent', 'No', inplace=True)
out_df.replace('Present', 'Yes', inplace=True)

if os.path.exists(OUTPUT_FILE):
    os.remove(OUTPUT_FILE)
# Write the combined dataframe to a CSV fil
out_df.to_csv(OUTPUT_FILE)
print('The below should show only results, no reports or other PHI. Please send this file back to BJE@mayo.edu')

out_df

The below should show only results, no reports or other PHI. Please send this file back to BJE@mayo.edu


Unnamed: 0_level_0,Pulmonary Embolism_response,Pneumonia_response,LiverMets_response,C1FX_response,C2FX_response,C3FX_response,C4FX_response,C5FX_response,C6FX_response,C7FX_response,GliomaStatus_response,ExamClass,Findings
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,No,No,No,Yes,No,No,No,No,No,No,No,Cervical Spine Fracture,C1FX
1,No,No,No,Yes,No,No,No,No,No,No,No,Cervical Spine Fracture,C1FX
2,No,No,No,Yes,No,No,No,No,No,No,No,Cervical Spine Fracture,C1FX
3,No,No,No,Yes,Yes,No,No,No,No,No,No,Cervical Spine Fracture,"C1FX,C2FX"
4,No,No,No,Yes,No,No,No,No,No,No,No,Cervical Spine Fracture,"C1FX,C2FX"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
764,No,No,No,No,No,No,No,No,No,No,No,Pneumonia,No
765,No,No,No,No,No,No,No,No,No,No,No,Pneumonia,No
766,No,No,No,No,No,No,No,No,No,No,No,Pneumonia,No
767,No,No,No,No,No,No,No,No,No,No,No,Pneumonia,No
