# Installing required packages


In [None]:
!pip install Bio-Epidemiology-NER
!pip install pandas==1.5.3
import nltk
nltk.download('punkt_tab')
!pip uninstall -y torch torchvision torchaudio
!pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 torchaudio==2.0.2+cu118 --index-url https://download.pytorch.org/whl/cu118


# Abbreviations dictionary

In [None]:
import pandas as pd

a = pd.read_csv('/content/drive/MyDrive/NLP/medical_abbrevations.csv')
a.head()


Unnamed: 0,abbrevation,full_form
0,aa,aplastic anemia
1,aaa,abdominal aortic aneurysm
2,aaox3,"awake, alert, and oriented to person, place, a..."
3,ac,before meals
4,acl,anterior cruciate ligament


# Text PreProcessing

In [None]:
import re
import pandas as pd
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS


def basic_cleaning(text):
    # Convert to lowercase, remove non-alphanumeric characters, and extra spaces
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text


def remove_stopwords(text, stopwords):
    # Split text into words and manually check for stopwords to simplify the loop
    words = text.split()
    filtered_words = [word for word in words if word not in stopwords]
    return ' '.join(filtered_words)

def load_abbreviation_dict(csv_file):
    # Read the CSV into a DataFrame
    df = pd.read_csv(csv_file)
    # Convert the DataFrame into a dictionary (abbreviation -> full form)
    abbrev_dict = pd.Series(df['full_form'].values, index=df['abbrevation']).to_dict()
    return abbrev_dict

def expand_abbreviations(text, abbrev_dict):
    # Split the text into words and expand abbreviations using the abbreviation dictionary
    words = text.split()
    expanded_text = [abbrev_dict.get(word, word) for word in words]
    return ' '.join(expanded_text)

def custom_preprocessing(text, abbrev_dict, stopwords, medical_terms):
    # Apply basic cleaning, expand abbreviations, and remove stopwords
    text = basic_cleaning(text)
    text = expand_abbreviations(text, abbrev_dict)

    text = remove_stopwords(text, stopwords)
    return text

# UMLS API Integration

In [None]:
import requests

# Set up your UMLS API key
API_KEY = '46673b43-5ec5-49f1-b6a7-ca0982303727'  # Replace this with your actual UMLS API key

# Function to get Ticket Granting Ticket (TGT)
def get_umls_auth_token(api_key):
    auth_endpoint = 'https://utslogin.nlm.nih.gov/cas/v1/api-key'
    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
    data = f'apikey={api_key}'

    response = requests.post(auth_endpoint, headers=headers, data=data)
    if response.status_code == 201:
        # Extract TGT from the Location header
        tgt = response.headers['location']
        return tgt
    else:
        raise Exception('Error retrieving UMLS authentication token: ' + response.text)

# Function to get a Service Ticket (ST) using TGT
def get_service_ticket(tgt):
    service = 'http://umlsks.nlm.nih.gov'
    response = requests.post(tgt, data={'service': service})
    if response.status_code == 200:
        return response.text
    else:
        raise Exception('Error retrieving UMLS service ticket: ' + response.text)
print(get_umls_auth_token(API_KEY))
get_service_ticket(get_umls_auth_token(API_KEY))

# Function to search for the CUI of a medical term
def search_umls_cui(term, ticket):
    search_endpoint = 'https://uts-ws.nlm.nih.gov/rest/search/current'
    params = {
        'string': term,
        'ticket': ticket,
        'searchType': 'exact'  # Using 'exact' to get the best match
    }

    response = requests.get(search_endpoint, params=params)
    if response.status_code == 200:
        results = response.json()
        if results['result']['results']:
            # Extract the first CUI found
            return results['result']['results'][0]['ui']
        else:
            print("No CUI found for the term.")
            return None
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return None

def get_umls_definitions(cui, ticket):
    definition_endpoint = f'https://uts-ws.nlm.nih.gov/rest/content/current/CUI/{cui}/definitions'
    params = {'ticket': ticket}

    response = requests.get(definition_endpoint, params=params)
    if response.status_code == 200:
        definitions = response.json()
        if definitions['result']:
            return [item['value'] for item in definitions['result']]
        else:
            print("No definitions found for the CUI.")
            return None
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return None


In [None]:
import re

def get_medical_term_definitions(term, preferred_language='english'):
    try:
        # Step 1: Get the authentication token (TGT)
        tgt = get_umls_auth_token(API_KEY)

        # Step 2: Get a service ticket (ST)
        ticket = get_service_ticket(tgt)

        # Step 3: Search for the CUI of the term
        cui = search_umls_cui(term, ticket)

        if cui:
            print(f"Found CUI: {cui}")

            # Step 4: Get definitions using the CUI
            ticket = get_service_ticket(tgt)  # Refresh the ticket for each API call
            definitions = get_umls_definitions(cui, ticket)

            if definitions:
                print(f"Definitions for '{term}':")

                # Language detection using regex patterns
                english_pattern = re.compile(r'^[a-zA-Z0-9\s.,\-\'()]+$')

                # Filter definitions based on preferred language
                filtered_definitions = []

                for definition in definitions:
                    if preferred_language == 'english' and english_pattern.match(definition):
                        filtered_definitions.append(definition)

                # Print filtered definitions
                if filtered_definitions:
                    for idx, definition in enumerate(filtered_definitions, 1):
                        print(f"{idx}. {definition}")
                else:
                    print(f"No definitions available for term '{term}' in {preferred_language}.")
            else:
                print(f"No definitions available for term '{term}'.")
        else:
            print(f"Could not find CUI for term '{term}'.")

    except Exception as e:
        print(f"Error: {str(e)}")

# Example usage
get_medical_term_definitions('osteoarthritis', preferred_language='english')


Found CUI: C0029408
Definitions for 'osteoarthritis':
1. A noninflammatory degenerative joint disease occurring chiefly in older persons, characterised by degeneration of the articular cartilage, hypertrophy of bone at the margins and changes in the synovial membrane. It is accompanied by pain and stiffness, particularly after prolonged activity.
2. noninflammatory degenerative joint disease occurring chiefly in older persons, characterized by degeneration of the articular cartilage, hypertrophy of bone at the margins, and changes in the synovial membrane, accompanied by pain and stiffness.
3. A progressive, degenerative joint disease, the most common form of arthritis, especially in older persons. The disease is thought to result not from the aging process but from biochemical changes and biomechanical stresses affecting articular cartilage. In the foreign literature it is often called osteoarthrosis deformans.


# NER and UMLS combined

In [None]:
import re
import pandas as pd
import requests
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
from Bio_Epidemiology_NER.bio_recognizer import ner_prediction
from typing import List, Dict


# UMLS API Authentication and Query Functions
def get_umls_auth_token(api_key: str) -> str:
    """
    Get a Ticket-Granting Ticket (TGT) using the UMLS API key.
    """
    url = "https://utslogin.nlm.nih.gov/cas/v1/api-key"
    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
    data = f'apikey={api_key}'
    response = requests.post(url, headers=headers, data=data)
    response.raise_for_status()
    return response.headers['location']


def get_service_ticket(tgt: str) -> str:
    """
    Get a Service Ticket (ST) using the TGT.
    """
    service = "http://umlsks.nlm.nih.gov"
    response = requests.post(tgt, data={'service': service})
    response.raise_for_status()
    return response.text


def search_umls_cui(term: str, ticket: str) -> str:
    """
    Search for the CUI of a medical term.
    """
    search_endpoint = "https://uts-ws.nlm.nih.gov/rest/search/current"
    params = {
        'string': term,
        'ticket': ticket,
        'searchType': 'exact'
    }
    response = requests.get(search_endpoint, params=params)
    response.raise_for_status()
    results = response.json()
    if results['result']['results']:
        return results['result']['results'][0]['ui']
    return None


def get_umls_definitions(cui: str, ticket: str) -> List[str]:
    """
    Retrieve definitions for a CUI.
    """
    definition_endpoint = f"https://uts-ws.nlm.nih.gov/rest/content/current/CUI/{cui}/definitions"
    params = {'ticket': ticket}
    response = requests.get(definition_endpoint, params=params)
    response.raise_for_status()
    definitions = response.json()
    return [item['value'] for item in definitions['result']] if definitions['result'] else []


# Preprocessing Functions
def basic_cleaning(text: str) -> str:
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text


def remove_stopwords(text: str, stopwords: set) -> str:
    words = text.split()
    filtered_words = [word for word in words if word not in stopwords]
    return ' '.join(filtered_words)


def load_abbreviation_dict(csv_file: str) -> Dict[str, str]:
    df = pd.read_csv(csv_file)
    return pd.Series(df['full_form'].values, index=df['abbrevation']).to_dict()


def expand_abbreviations(text: str, abbrev_dict: Dict[str, str]) -> str:
    words = text.split()
    expanded_text = [abbrev_dict.get(word, word) for word in words]
    return ' '.join(expanded_text)


def custom_preprocessing(text: str, abbrev_dict: Dict[str, str], stopwords: set) -> str:
    text = basic_cleaning(text)
    text = expand_abbreviations(text, abbrev_dict)
    text = remove_stopwords(text, stopwords)
    return text


# Main Workflow
def process_medical_text(text: str, api_key: str, abbrev_dict: Dict[str, str], stopwords: set) -> None:
    # Step 1: Preprocess the text
    doc = custom_preprocessing(text, abbrev_dict, stopwords)

    # Step 2: Extract entities using the Bio_Epidemiology_NER model
    entities_df = ner_prediction(corpus=doc, compute='cpu')

    # Step 3: Filter entities based on required tags
    required_tags = ['Disease_disorder', 'Medication', 'Sign_symptom']
    filtered_df = entities_df[entities_df['entity_group'].isin(required_tags)].reset_index(drop=True)

    # Step 4: Extract unique terms for querying definitions
    unique_terms = filtered_df['value'].unique()

    print("Detected Terms with Definitions:")
    for term in unique_terms:
        try:
            # Step 5: Query UMLS for definitions
            tgt = get_umls_auth_token(api_key)
            ticket = get_service_ticket(tgt)
            cui = search_umls_cui(term, ticket)

            if cui:
                print(f"\nTerm: {term} (CUI: {cui})")
                ticket = get_service_ticket(tgt)  # Refresh ticket for each API call
                definitions = get_umls_definitions(cui, ticket)
                if definitions:
                    for idx, definition in enumerate(definitions, 1):
                        print(f"  {idx}. {definition}")
                else:
                    print("  No definitions found.")
            else:
                print(f"\nTerm: {term}")
                print("  No CUI found.")
        except Exception as e:
            print(f"Error retrieving information for term '{term}': {e}")


# Example Usage
if __name__ == "__main__":
    # Load abbreviation dictionary from CSV
    abbrev_dict = load_abbreviation_dict('/content/drive/MyDrive/NLP/medical_abbrevations.csv')

    # Example text
    text = """
    Patient Name: John Doe
Date of Birth: 1975-05-10
Date of Examination: 2024-11-25
Examined By: Dr. Jane Smith, MD

Chief Complaint:
Persistent cough, intermittent fever, and fatigue for two weeks.

Assessment:
Suspected pneumonia. Possible COPD exacerbation.

Plan:
Chest X-ray, blood tests, sputum culture.
Antibiotics (e.g., Azithromycin), bronchodilators (e.g., Albuterol).
Smoking cessation counseling.
Follow-up in 5-7 days.

Disclaimer: This is a sample medical report and should not be considered actual medical advice.
    """

    # Process the medical text
    process_medical_text(text, API_KEY, abbrev_dict, ENGLISH_STOP_WORDS)


  final_df = final_df.append(disease_df) # adding the disease_df to existing
  master_df = master_df.append(final_df)


Detected Terms with Definitions:

Term: cough (CUI: C0010200)
  1. Prudký a zpravidla hlasitý výdechový manévr. Vykašlávání (expektorace) je obranná reflexní akce, která směřuje k uvolnění dýchacích cest od hlenu, sputa (sliny, chrchle) a jakýchkoliv jiných cizorodých těles, které blokují či dráždí dýchací cesty. (cit. Wikipedie 2016 http://www.cs.wikipedia.org/)
  2. A sudden, audible expulsion of air from the lungs through a partially closed glottis, preceded by inhalation. [https://orcid.org/0000-0002-0736-9199, PMID:16428719, PMID:17540788]
  3. A sudden, often repetitive, spasmodic contraction of the thoracic cavity, resulting in violent release of air from the lungs, and usually accompanied by a distinctive sound.
  4. <p>Coughing is a reflex that keeps your throat and airways clear. Although it can be annoying, coughing helps your body heal or protect itself. Coughs can be either acute or chronic. Acute coughs begin suddenly and usually last no more than 2 to 3 weeks. Acute coug

In [None]:
import re
import pandas as pd
import requests
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
from Bio_Epidemiology_NER.bio_recognizer import ner_prediction
from typing import List, Dict


# UMLS API Authentication and Query Functions
def get_umls_auth_token(api_key: str) -> str:
    """
    Get a Ticket-Granting Ticket (TGT) using the UMLS API key.
    """
    url = "https://utslogin.nlm.nih.gov/cas/v1/api-key"
    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
    data = f'apikey={api_key}'
    response = requests.post(url, headers=headers, data=data)
    response.raise_for_status()
    return response.headers['location']


def get_service_ticket(tgt: str) -> str:
    """
    Get a Service Ticket (ST) using the TGT.
    """
    service = "http://umlsks.nlm.nih.gov"
    response = requests.post(tgt, data={'service': service})
    response.raise_for_status()
    return response.text


def search_umls_cui(term: str, ticket: str) -> str:
    """
    Search for the CUI of a medical term.
    """
    search_endpoint = "https://uts-ws.nlm.nih.gov/rest/search/current"
    params = {
        'string': term,
        'ticket': ticket,
        'searchType': 'exact'
    }
    response = requests.get(search_endpoint, params=params)
    response.raise_for_status()
    results = response.json()
    if results['result']['results']:
        return results['result']['results'][0]['ui']
    return None


def get_umls_definitions(cui: str, ticket: str) -> List[str]:
    """
    Retrieve definitions for a CUI and filter only English definitions.
    """
    definition_endpoint = f"https://uts-ws.nlm.nih.gov/rest/content/current/CUI/{cui}/definitions"
    params = {'ticket': ticket}
    response = requests.get(definition_endpoint, params=params)
    response.raise_for_status()
    definitions = response.json()
    all_definitions = [item['value'] for item in definitions['result']] if definitions['result'] else []

    # Filter English definitions using regex
    english_pattern = re.compile(r'^[a-zA-Z0-9\s.,\-\'()]+$')
    filtered_definitions = [definition for definition in all_definitions if english_pattern.match(definition)]

    return filtered_definitions


# Preprocessing Functions
def basic_cleaning(text: str) -> str:
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text


def remove_stopwords(text: str, stopwords: set) -> str:
    words = text.split()
    filtered_words = [word for word in words if word not in stopwords]
    return ' '.join(filtered_words)


def load_abbreviation_dict(csv_file: str) -> Dict[str, str]:
    df = pd.read_csv(csv_file)
    return pd.Series(df['full_form'].values, index=df['abbrevation']).to_dict()


def expand_abbreviations(text: str, abbrev_dict: Dict[str, str]) -> str:
    words = text.split()
    expanded_text = [abbrev_dict.get(word, word) for word in words]
    return ' '.join(expanded_text)


def custom_preprocessing(text: str, abbrev_dict: Dict[str, str], stopwords: set) -> str:
    text = basic_cleaning(text)
    text = expand_abbreviations(text, abbrev_dict)
    text = remove_stopwords(text, stopwords)
    return text

# Main Workflow
def process_medical_text(text: str, api_key: str, abbrev_dict: Dict[str, str], stopwords: set) -> None:
    # Step 1: Preprocess the text
    doc = custom_preprocessing(text, abbrev_dict, stopwords)

    # Step 2: Extract entities using the Bio_Epidemiology_NER model
    entities_df = ner_prediction(corpus=doc, compute='cpu')

    # Step 3: Filter entities based on required tags
    required_tags = ['Disease_disorder', 'Sign_symptom']
    filtered_df = entities_df[entities_df['entity_group'].isin(required_tags)].reset_index(drop=True)

    # Step 4: Extract unique terms for querying definitions
    unique_terms = filtered_df['value'].unique()

    # Step 4.1: Split multi-word terms into single terms
    single_terms = set()
    for term in unique_terms:
        # Split terms on spaces to handle multi-word terms
        words = term.split()
        single_terms.update(words)  # Add individual words to the set

    print("Detected Terms with Definitions:")
    for term in single_terms:
        try:
            # Step 5: Query UMLS for definitions
            tgt = get_umls_auth_token(api_key)
            ticket = get_service_ticket(tgt)
            cui = search_umls_cui(term, ticket)

            if cui:
                print(f"\nTerm: {term} (CUI: {cui})")
                ticket = get_service_ticket(tgt)  # Refresh ticket for each API call
                definitions = get_umls_definitions(cui, ticket)
                if definitions:
                    for idx, definition in enumerate(definitions, 1):
                        print(f"  {idx}. {definition}")
                else:
                    print("  No definitions found.")
            else:
                print(f"\nTerm: {term}")
                print("  No CUI found.")
        except Exception as e:
            print(f"Error retrieving information for term '{term}': {e}")


# Example Usage
if __name__ == "__main__":
    # Load abbreviation dictionary from CSV
    abbrev_dict = load_abbreviation_dict('/content/drive/MyDrive/NLP/medical_abbrevations.csv')

    # Example text
    text = """
Patient Name: John Doe
Date of Birth: 1975-05-10
Date of Examination: 2024-11-25
Examined By: Dr. Jane Smith, MD

Chief Complaint:
Chronic productive cough, intermittent pyrexia, and asthenia.

Assessment:
Suspected pneumonia. Possible chronic obstructive pulmonary disease exacerbation.

Plan:
Complete Blood Count (CBC), Comprehensive Metabolic Panel (CMP), chest radiograph, sputum Gram stain and culture. Initiate azithromycin. Instruct on albuterol inhaler use. Smoking cessation counseling.

Disclaimer: This is a sample medical report and should not be considered actual medical advice.

Key Changes:

Removed unnecessary details: Age, social history, past medical history (except relevant), family history, physical exam details (except key findings).
Used medical terminology:
"Pyrexia" instead of "fever"
"Asthenia" instead of "fatigue"
"Chest radiograph" instead of "Chest X-ray"
"Sputum Gram stain and culture" for more specificity.

    """

    # Process the medical text
    process_medical_text(text, API_KEY, abbrev_dict, ENGLISH_STOP_WORDS)


  final_df = final_df.append(disease_df) # adding the disease_df to existing
  master_df = master_df.append(final_df)


Detected Terms with Definitions:

Term: cmp (CUI: C0596993)
  1. A hematopoietic stem cell found in the bone marrow that is committed to form erythrocytes, megakaryocytes, and all leukocytes except lymphocytes.

Term: plan (CUI: C0270724)
  1. A rare autosomal recessive neurodegenerative disorder caused by mutations in the PLA2G6 gene. It is characterized by the development of swellings called spehroids along the axons of the central nervous system. Signs and symptoms appear early in life and include movement difficulties, muscle hypotonia and spasticity, and dementia.

Term: sample (CUI: C2347026)
  1. Any material sample taken from a biological entity for testing, diagnostic, propagation, treatment or research purposes, including a sample obtained from a living organism or taken from the biological object after halting of all its life functions. Biospecimen can contain one or more components including but not limited to cellular molecules, cells, tissues, organs, body fluids, embryos

# Summarization


In [None]:
import re
import pandas as pd
import requests
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
from Bio_Epidemiology_NER.bio_recognizer import ner_prediction
from transformers import pipeline
from typing import List, Dict

# UMLS API Authentication and Query Functions
def get_umls_auth_token(api_key: str) -> str:
    url = "https://utslogin.nlm.nih.gov/cas/v1/api-key"
    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
    data = f'apikey={api_key}'
    response = requests.post(url, headers=headers, data=data)
    response.raise_for_status()
    return response.headers['location']


def get_service_ticket(tgt: str) -> str:
    service = "http://umlsks.nlm.nih.gov"
    response = requests.post(tgt, data={'service': service})
    response.raise_for_status()
    return response.text


def search_umls_cui(term: str, ticket: str) -> str:
    search_endpoint = "https://uts-ws.nlm.nih.gov/rest/search/current"
    params = {
        'string': term,
        'ticket': ticket,
        'searchType': 'exact'
    }
    response = requests.get(search_endpoint, params=params)
    response.raise_for_status()
    results = response.json()
    if results['result']['results']:
        return results['result']['results'][0]['ui']
    return None


def get_umls_definitions(cui: str, ticket: str) -> List[str]:
    definition_endpoint = f"https://uts-ws.nlm.nih.gov/rest/content/current/CUI/{cui}/definitions"
    params = {'ticket': ticket}
    response = requests.get(definition_endpoint, params=params)
    response.raise_for_status()
    definitions = response.json()
    all_definitions = [item['value'] for item in definitions['result']] if definitions['result'] else []

    # Filter English definitions
    english_pattern = re.compile(r'^[a-zA-Z0-9\s.,\-\'()]+$')
    filtered_definitions = [definition for definition in all_definitions if english_pattern.match(definition)]

    return filtered_definitions


# Preprocessing Functions
def basic_cleaning(text: str) -> str:
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text


def remove_stopwords(text: str, stopwords: set) -> str:
    words = text.split()
    filtered_words = [word for word in words if word not in stopwords]
    return ' '.join(filtered_words)


def load_abbreviation_dict(csv_file: str) -> Dict[str, str]:
    df = pd.read_csv(csv_file)
    return pd.Series(df['full_form'].values, index=df['abbrevation']).to_dict()


def expand_abbreviations(text: str, abbrev_dict: Dict[str, str]) -> str:
    words = text.split()
    expanded_text = [abbrev_dict.get(word, word) for word in words]
    return ' '.join(expanded_text)


def custom_preprocessing(text: str, abbrev_dict: Dict[str, str], stopwords: set) -> str:
    text = basic_cleaning(text)
    text = expand_abbreviations(text, abbrev_dict)
    text = remove_stopwords(text, stopwords)
    if not text.strip():  # Ensure there's text after preprocessing
        raise ValueError("The processed text is empty.")
    return text


# Replace Terms with Definitions
def replace_terms_with_definitions(report: str, term_definitions: Dict[str, List[str]]) -> str:
    for term, definitions in term_definitions.items():
        if definitions:
            definition = definitions[0]  # Use the first available definition
            report = re.sub(
                fr'\b{re.escape(term)}\b',
                f"{term} ({definition})",
                report,
                flags=re.IGNORECASE
            )
    return report

# Define a summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Main Workflow
def process_medical_text(text: str, api_key: str, abbrev_dict: Dict[str, str], stopwords: set) -> None:
    # Step 1: Preprocess the text
    doc = custom_preprocessing(text, abbrev_dict, stopwords)

    # Step 2: Extract entities using the Bio_Epidemiology_NER model
    entities_df = ner_prediction(corpus=doc, compute='cpu')

    # Step 3: Filter entities based on required tags
    required_tags = ['Disease_disorder', 'Sign_symptom']
    filtered_df = entities_df[entities_df['entity_group'].isin(required_tags)].reset_index(drop=True)

    # Step 4: Extract unique terms for querying definitions
    unique_terms = filtered_df['value'].unique()

    # Step 4.1: Split multi-word terms into single terms
    single_terms = set()
    for term in unique_terms:
        words = term.split()
        single_terms.update(words)  # Add individual words to the set

    term_definitions = {}
    for term in single_terms:
        try:
            # Query UMLS for definitions
            tgt = get_umls_auth_token(api_key)
            ticket = get_service_ticket(tgt)
            cui = search_umls_cui(term, ticket)

            if cui:
                ticket = get_service_ticket(tgt)  # Refresh ticket
                definitions = get_umls_definitions(cui, ticket)
                term_definitions[term] = definitions
        except Exception as e:
            print(f"Error retrieving information for term '{term}': {e}")

    # Step 5: Replace terms in the text with definitions
    simplified_report = replace_terms_with_definitions(text, term_definitions)

    # Step 6: Summarize the simplified report
    summary = summarizer(simplified_report, max_length=500, min_length=50, do_sample=False)

    # Print or return both simplified report and summary
    print("Simplified Report:\n", simplified_report)
    print("\nSummary:\n", summary[0]['summary_text'])

# Example Usage
if __name__ == "__main__":
    # Load abbreviation dictionary from CSV
    abbrev_dict = load_abbreviation_dict('/content/drive/MyDrive/NLP/medical_abbrevations.csv')

    # Example text
text = """
Patient Name: John Doe
Date of Birth: 1975-05-10
Date of Examination: 2024-11-25
Examined By: Dr. Jane Smith, MD
Chief Complaint:
Productive cough with yellow-green sputum for two weeks, intermittent fevers, and fatigue.
History of Present Illness:
Patient reports a two-week history of a productive cough producing yellow-green sputum. He describes intermittent episodes of fever, characterized by chills and night sweats. He also complains of significant fatigue, impacting his daily activities.
Physical Examination:
Lungs:
Decreased breath sounds in the right lower lobe.
Rales noted in the right base.
Assessment:
Suspected Diagnosis:
Pneumonia
Possible Chronic Obstructive Pulmonary Disease (COPD) exacerbation
"""

    # Process the medical text
process_medical_text(text, "46673b43-5ec5-49f1-b6a7-ca0982303727", abbrev_dict, ENGLISH_STOP_WORDS)


  final_df = final_df.append(disease_df) # adding the disease_df to existing
  master_df = master_df.append(final_df)


Error retrieving information for term 'obstructive': 404 Client Error: Not Found for url: https://uts-ws.nlm.nih.gov/rest/content/current/CUI/C0549186/definitions?ticket=ST-39402-7e8311iuvm4fvj5xb-cas
Simplified Report:
 
Patient Name: John Doe
Date of Birth: 1975-05-10
Date of Examination: 2024-11-25
Examined By: Dr. Jane Smith, MD
Chief complaint (A symptom or problem for which a patient seeks medical attention.):
Productive cough (A sudden, often repetitive, spasmodic contraction of the thoracic cavity, resulting in violent release of air from the lungs, and usually accompanied by a distinctive sound.) with yellow-green sputum for two weeks, intermittent fevers (An abnormal elevation of body temperature, usually as a result of a pathologic process.), and fatigue (Fatigue refers to a lack of energy, and it may be either acute or chronic (Condition lasting for more than 3-6 months, or persisting beyond the course of an acute disease (A definite pathologic process with a characteristic