In [1]:
import pandas as pd
import numpy as np
import openai
import re
import time
import os

In [71]:
input_file_path = 'cpt_code_noSummary.csv'
output_file_path = input_file_path.replace('.csv', '_GPT_processed.csv')

In [72]:
# read csv
df = pd.read_csv(input_file_path).dropna().head(5)
df

  df = pd.read_csv(input_file_path).dropna().head(5)


Unnamed: 0,Code,Description
0,44603,No Summary found for this code
1,0012F,No Summary found for this code
2,0014F,No Summary found for this code
3,0015F,No Summary found for this code
4,0582F,No Summary found for this code


In [73]:
PREFIX = '''
###### Instructions ######
I am working with Current Procedural Terminology (CPT) reimbursement code and need a long description on the corresponding CPT code. I need your help to provide a long description for each CPT code. \
Please provide a one-sentence description for each CPT code based on information from MD Clarity, Find-A-Code, or GenHealth.ai as available.

###### Example of Desired Response ######
[Description]: The provider performs anesthesia services for a patient undergoing an intracranial procedure, including electrocoagulation of an intracranial nerve, which means to stop bleeding using electrocautery, or an electrical current.

If you are unsure about the long desceription of the CPT code, please respond with:
[Description]: Not Sure

'''

In [74]:
# Check if the output file exists, if so, load the DataFrame from it
if os.path.exists(output_file_path):
    df = pd.read_csv(output_file_path)
    print(f"Resuming from checkpoint, starting at row {df['GPT description'].last_valid_index() + 1}...")
else:
    # If there is no checkpoint, then add new columns to the original DataFrame for storing results
    df['GPT description'] = None
    df['GPT description cleaned'] = None


# loop through first 10 rows
i = 0
n_calls = 0
for index, row in df.iterrows():
    # skip if already have answer and confidence level
    if pd.notnull(row['GPT description']):
        continue
    
    #---------------------------- create prompt ----------------------------
    cpt_code = row['Code']
    cpt_code_for_prompt = f'###### Now here is the CPT Code that Need Description: {cpt_code} ######'

    prompt = f'{PREFIX}{cpt_code_for_prompt}'

    # ---------------------------- send message and get response ----------------------------
    message = [
        {
            "role": "system",
            "content": "I am a language model designed to generate detailed descriptions of Current Procedural Terminology (CPT) codes. Please help by providing a precise, one-sentence description for each CPT code given. If the correct description is uncertain, please respond with '[Description]: Not Sure'."
        },
        {"role": "user", "content": prompt}
    ]

    
    try:
        openai.api_key = "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"
        response = openai.ChatCompletion.create(
                model="gpt-4o-2024-08-06",
                messages=message
            )
        n_calls += 1
    
    #---------------------------- parse response ----------------------------
        output = response['choices'][0]['message']['content'].strip()
        description = re.search(r'\[Description\]:\s*(.+)', output, re.DOTALL)
        description = description.group(1) if description else None
        print(f"====== output for {cpt_code}, {output} ======")
        print(f"====== cleaned description for {cpt_code}: {description} ======")
        
        # update dataframe
        df.at[index, 'GPT description'] = output
        df.at[index, 'GPT description cleaned'] = description
    
    # ---------------------------- error handling and save checkpoint ----------------------------
    except Exception as e:
        print('Error occured at row: ', index, ' with error: ', e)
        # Sleep for 10 seconds to avoid rate limit
        time.sleep(10)
    
    # save checkpoint
    df.to_csv(output_file_path, index=False)

    # sleep for 20 second to avoid rate limit every 10 calls
    if n_calls % 10 == 0:
        print(f"Sleeping for 20 seconds to avoid rate limit...")
        time.sleep(20)



In [70]:
f'{PREFIX}{cpt_code_for_prompt}'

'\n###### Instructions ######\nI am working with CPT reimbursement code and need a long description on the corresponding CPT code. I need your help to provide a long description for each CPT code. Please provide a one-sentence description for each CPT code based on information from MD Clarity, Find-A-Code, or GenHealth.ai as available.\n\n###### Example of Desired Response ######\n[Description]: The provider performs anesthesia services for a patient undergoing an intracranial procedure, including electrocoagulation of an intracranial nerve, which means to stop bleeding using electrocautery, or an electrical current.\n\nIf you are unsure about the long desceription of the CPT code, please respond with:\n[Description]: Not Sure\n\n###### Now here is the CPT Code that Need Description: 0582F ######'

In [69]:
# test first 5
df_output = pd.read_csv(output_file_path)
df_output

Unnamed: 0,Code,Description,GPT description,GPT description cleaned
0,44603,No Summary found for this code,[Description]: The provider performs an open s...,The provider performs an open surgical procedu...
1,0012F,No Summary found for this code,[Description]: Not Sure,Not Sure
2,0014F,No Summary found for this code,[Description]: Not Sure,Not Sure
3,0015F,No Summary found for this code,[Description]: Not Sure,Not Sure
4,0582F,No Summary found for this code,[Description]: Not Sure,Not Sure


In [None]:
# save to csv
df_output.to_csv(output_file_path, index=False)
df_output