In [14]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import word_tokenize

In [None]:
cpt_file = "cpt_codes_excludeNotFound.csv"
cpt_data = pd.read_csv(cpt_file)\
# clean cpt_data
cpt_data = cpt_data.drop_duplicates(subset=None, keep='first', inplace=False)

# Extract codes and descriptions
cpt_descriptions = cpt_data["Description"].tolist()
cpt_descriptions

['The provider performs anesthesia services for a patient undergoing an intracranial procedure, including electrocoagulation of an intracranial nerve, which means to stop bleeding using electrocautery, or an electrical current.',
 'The provider performs anesthesia services for a patient undergoing a procedure on the facial bones or the skull.',
 'The provider performs anesthesia services for a patient undergoing radical surgery procedures on the facial bones or skull. This may include prognathism, which is a protrusion of the lower jaw.',
 'The provider performs anesthesia services for a patient undergoing intracranial procedures that are not specifically described by another anesthesia code.',
 'The provider performs anesthesia services for a patient undergoing intracranial procedures that involve removing a small portion of skull bone to take out a hematoma or blood clot from within the brain or tissues surrounding it.',
 'The provider performs anesthesia services for a patient under

In [8]:
cpt_data.head()

Unnamed: 0,Code,Description
0,222,The provider performs anesthesia services for ...
1,190,The provider performs anesthesia services for ...
2,192,The provider performs anesthesia services for ...
3,210,The provider performs anesthesia services for ...
4,211,The provider performs anesthesia services for ...


In [6]:
pitch_text = "Behavioral Health Support for Primary Care Integrated, Virtual Mental Health Care for Family Medicine, Pediatrics and OBGYN OUR  MISSION Increasing Access to Mental Health Services in Underresourced Areas  While about a quarter of adults have a mental illness, 50 %  of the US population lives in a mental health shortage area.  [company name] sets out to help patients in areas where support is otherwise unavailable.  THE PROBLEM 25% 80% 50%  of PCP visits involve mental health Patients overwhelmingly bring their mental health concerns to primary care providers instead of going directly to specialty care. PCPs act as the de facto triage point but have limited support.of patients with mental health concerns seek support from primary care PCPs work in 15-minute appointments and typically have limited resources and training, putting a strain both on them and their patients.Of the US population lives in a mental health shortage area Given massive shortages, there are frequently 6 +  month waitlists for patients who need specialty mental health support.Communities Lack Access to Mental Healthcare Patients look to primary care as their front line of support, however providers are poorly equipped Collaborative Care ([name]) [name]  integrates physical and mental health care in the primary care setting through counseling and consulting psychiatry.  There have been 80 +  academic studies *  showing :  [name] is highly effective (2x vs. usual care) 1. [name] reduces cost of care substantially 2. [name] drives up patient and provider satisfaction 3. *See details here THE CARE MODEL [company name] Has Aligned Incentives With Primary Care Clinics Improve Outcomes  & Patient Satisfaction Studies of the [name] Model have shown that it is more than 2x as effective as usual care in driving mental health symptoms to remission.  Moreover, 75 %  of patients who are treated through [name] models are highly satisfied with their care. Activate A New  FFS Revenue Stream Collaborative care can generate significant new revenue for partner practices through FFS reimbursement. [company name] generates ~$ 10k in contrbution margin per engaged PCP / year. Improve Risk Adjustments & Reduce Cost of Care We can drive an additional increase in revenue through refining mental health diagnoses and improving risk adjustments. Additionally, [name] has been proven to reduce patient cost of care by $ 3,400 over 4 years. VALUE PROPOSITION. Thank you For more info, contact : [email]"


In [21]:
pitch_text_summarized = "The primary service is the integration of virtual mental health care into primary care settings such as family medicine, pediatrics, and OBGYN, aimed at increasing access to mental health services in underresourced areas. The key components include counseling and consulting psychiatry delivered through a collaborative care model, which has been shown to be over two times more effective than usual care and is supported by over 80 academic studies. Special considerations highlight that this model not only improves patient and provider satisfaction but also creates new revenue streams for partner practices through fee-for-service reimbursement, and it reduces overall patient care costs."

### TF-IDF Matching
Corpus: combine all CPT descriptions and pitch text

In [None]:
# Corpus: combine all CPT descriptions and pitch text
# Function to perform TF-IDF matching to find the most relevant CPT codes
def find_relevant_cpt_codes_tfidf(pitch_text, cpt_codes_df, top_n=5):
    # Combine extracted text with CPT descriptions for TF-IDF
    corpus = cpt_data['Description'].tolist() + [pitch_text]
    
    # Convert the corpus into a TF-IDF matrix
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(corpus)
    
    # Calculate cosine similarity between the extracted text and all CPT descriptions
    cosine_similarities = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1]).flatten()
    
    # Get the indices of the top matches
    top_indices = cosine_similarities.argsort()[-top_n:][::-1]
    
    # Get the corresponding codes and descriptions for the best matches
    relevant_codes = []
    for idx in top_indices:
        relevant_codes.append({
            'Code': cpt_data.iloc[idx]['Code'],
            'Description': cpt_data.iloc[idx]['Description'],
            'Score': cosine_similarities[idx]
        })
    
    return relevant_codes

relevant_codes = find_relevant_cpt_codes_tfidf(pitch_text, cpt_data)

# Display the relevant CPT codes
for code in relevant_codes:
    print(f"Code: {code['Code']}, Description: {code['Description']}, Score: {code['Score']}")


Code: 99375, Description: Report this service when the provider supervises the care provided by a home health agency, overseeing the plan of care, while the patient is at home or in a facility meant for long–term care. For 99375 the provider should spend 30 minutes or more on a patient’s indirect care., Score: 0.2981006280009036
Code: 96156, Description: The provider assesses psychological, behavioral, emotional, cognitive, and social factors that affect a patient’s physical health, rather than assessing a specific mental health disorder. There are no time limits applied to this code. Report this code for an initial or repeat health behavior assessment., Score: 0.2731864380021739
Code: 99493, Description: A provider performs psychiatric collaborative care management (CoCM) for a patient receiving behavioral health treatment and regular psychiatric interspecialty consultation in collaboration and in conjunction with a patient’s treating (or billing) primary care provider.?Report?99493?f

Corpus: combine description for the CPT in this row and pitch text

In [17]:
import nltk
nltk.download('punkt')


[nltk_data] Downloading package punkt to /Users/apple/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [19]:
def find_relevant_cpt_codes_tfidf(pitch_text, cpt_data, top_n=5):
    # Initialize the Porter Stemmer
    stemmer = PorterStemmer()
    
    # Define a function to stem the text
    def stem_text(text):
        words = word_tokenize(text.lower())  # Tokenize and convert to lowercase
        stemmed_words = [stemmer.stem(word) for word in words if word.isalnum()]  # Apply stemming, keep only alphanumeric words
        return ' '.join(stemmed_words)
    
    # Preprocess the pitch text
    pitch_text_stemmed = stem_text(pitch_text)
    
    # Create a list to store similarity scores
    relevant_codes = []
    
    # Iterate through each CPT description to compare with the pitch text
    for index, row in cpt_data.iterrows():
        # Preprocess the CPT description
        description_stemmed = stem_text(row['Description'])
        
        # Create the corpus with the stemmed text
        corpus = [description_stemmed, pitch_text_stemmed]
        
        # Convert the corpus into a TF-IDF matrix
        vectorizer = TfidfVectorizer(stop_words='english')
        tfidf_matrix = vectorizer.fit_transform(corpus)
        
        # Calculate cosine similarity between the extracted text and the CPT description
        cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2]).flatten()[0]
        
        # Append the code, description, and similarity score
        relevant_codes.append({
            'Code': row['Code'],
            'Description': row['Description'],
            'Score': cosine_sim
        })
    
    # Sort the relevant codes by similarity score in descending order and return the top matches
    relevant_codes = sorted(relevant_codes, key=lambda x: x['Score'], reverse=True)[:top_n]
    return relevant_codes

In [None]:
relevant_codes = find_relevant_cpt_codes_tfidf(pitch_text, cpt_data)

# Display the relevant CPT codes
for code in relevant_codes:
    print(f"Code: {code['Code']}, Description: {code['Description']}, Score: {code['Score']}")

Code: 99375, Description: Report this service when the provider supervises the care provided by a home health agency, overseeing the plan of care, while the patient is at home or in a facility meant for long–term care. For 99375 the provider should spend 30 minutes or more on a patient’s indirect care., Score: 0.3519161022075752
Code: 99493, Description: A provider performs psychiatric collaborative care management (CoCM) for a patient receiving behavioral health treatment and regular psychiatric interspecialty consultation in collaboration and in conjunction with a patient’s treating (or billing) primary care provider.?Report?99493?for the first 60 minutes of CoCM in a subsequent month after the first month of care.?, Score: 0.32235474941523745
Code: 99374, Description: Report this service when the provider oversees the plan of care provided by a home health agency while the patient is at home or in a facility meant for long–term care. For 99374 the provider should spend a minimum 15–

In [39]:
cpt_data[cpt_data['Description'].str.contains(r'\?', na=False)]


Unnamed: 0,Code,Description
10326,99492,A provider performs psychiatric collaborative ...
10327,99493,A provider performs psychiatric collaborative ...
10526,0671T,The provider inserts one or more drainage devi...
10824,0671T,The provider inserts one or more drainage devi...


Unnamed: 0,Code,Description
0,222,The provider performs anesthesia services for ...
1,190,The provider performs anesthesia services for ...
2,192,The provider performs anesthesia services for ...
3,210,The provider performs anesthesia services for ...
4,211,The provider performs anesthesia services for ...
...,...,...
10983,0804T,The provider performs an in–person evaluation ...
10984,0805T,The provider uses a percutaneous femoral vein ...
10985,0806T,The provider uses an open femoral vein approac...
10986,0807T,The provider uses software that analyzes the p...
