### 1. MedCAT Installation

In [1]:
# Install medcat
! pip install medcat==1.8.0
try:
    from medcat.cat import CAT
except:
    print("WARNING: Runtime will restart automatically and please run other cells thereafter.")
    exit()



  from tqdm.autonotebook import tqdm, trange


### 1.1 Import the Medcat Library

In [2]:
# Import the required libraries
import pandas as pd
import numpy as np
import pickle

from medcat.cat import CAT

import xml.etree.ElementTree as ET

import json

from spacy import displacy

import pandas as pd
import xml.etree.ElementTree as ET

### Import the Modelpack

In this tutorial, we used the SNOMED International modelpack (Full SNOMED modelpack trained on MIMIC-III). To download this model, you need to sign in to your NIH profile/UMLS License using the provided link: <link>https://uts.nlm.nih.gov/uts/login?service=https:%2F%2Fmedcat.rosalind.kcl.ac.uk%2Fauth-callback</link>

or use the other model package without needing a license

Download from: <link>https://medcat.rosalind.kcl.ac.uk/media/medmen_wstatus_2021_oct.zip</link>

In [3]:
# Download the models and required data

!wget -N https://medcat.rosalind.kcl.ac.uk/media/medmen_wstatus_2021_oct.zip

--2023-08-27 16:00:28--  https://medcat.rosalind.kcl.ac.uk/media/medmen_wstatus_2021_oct.zip
Resolving medcat.rosalind.kcl.ac.uk (medcat.rosalind.kcl.ac.uk)... 193.61.202.139
Connecting to medcat.rosalind.kcl.ac.uk (medcat.rosalind.kcl.ac.uk)|193.61.202.139|:443... connected.
HTTP request sent, awaiting response... 404 Not Found
2023-08-27 16:00:30 ERROR 404: Not Found.



In [4]:
# or if it's not working
!gdown https://drive.google.com/uc?id=1UL6-JlBcWjZZrO_zye_2ufb2Nrod2bpu


Downloading...
From: https://drive.google.com/uc?id=1UL6-JlBcWjZZrO_zye_2ufb2Nrod2bpu
To: /content/medmen_wstatus_2021_oct.zip
100% 562M/562M [00:03<00:00, 141MB/s]


In [5]:
#from google.colab import drive
#drive.mount('/content/drive')


DIR = '/content/'
model_pack_path = DIR + "medmen_wstatus_2021_oct.zip"

### 1.4 Loading the MedCAT modelpack

In [6]:
# Load model pack and Create CAT - the main class from medcat used for concept annotation
cat = CAT.load_model_pack(model_pack_path)

INFO:medcat.cat:{
  "Model ID": null,
  "Last Modified On": null,
  "History (from least to most recent)": [],
  "Description": "No description",
  "Source Ontology": null,
  "Location": null,
  "MetaCAT models": {},
  "Basic CDB Stats": {},
  "Performance": {
    "ner": {},
    "meta": {}
  },
  "Important Parameters (Partial view, all available in cat.config)": {
    "config.ner.min_name_len": {
      "value": 3,
      "description": "Minimum detection length (found terms/mentions shorter than this will not be detected)."
    },
    "config.ner.upper_case_limit_len": {
      "value": 3,
      "description": "All detected terms shorter than this value have to be uppercase, otherwise they will be ignored."
    },
    "config.linking.similarity_threshold": {
      "value": 0.2,
      "description": "If the confidence of the model is lower than this a detection will be ignore."
    },
    "config.linking.filters.cuis": {
      "value": 0,
      "description": "Length of the CUIs filter t

In [7]:
text = 'Patient presents with a chief complaint of chest pain that radiates to the left arm. The pain began suddenly and is described as squeezing and pressure-like. Patient also reports shortness of breath, nausea, and lightheadedness. Physical examination reveals elevated blood pressure and an irregular heartbeat. Electrocardiogram shows ST-segment elevation indicative of an acute myocardial infarction. The patient is immediately admitted to the cardiac care unit for further evaluation and treatment.'

In [8]:
# Extract annotated entities in JSON format
annotated_entities_json = cat.get_entities(text)

# Prettify the annotations for better readability
prettified_json = json.dumps(annotated_entities_json, indent=4)

# Display the annotated entities in a readable JSON format
print()
print("Annotated Medical Entities (JSON format):")
print(prettified_json)


Annotated Medical Entities (JSON format):
{
    "entities": {
        "0": {
            "pretty_name": "Patients",
            "cui": "C0030705",
            "type_ids": [
                "T101"
            ],
            "types": [
                "Patient or Disabled Group"
            ],
            "source_value": "Patient",
            "detected_name": "patient",
            "acc": 0.38499695551547325,
            "context_similarity": 0.38499695551547325,
            "start": 0,
            "end": 7,
            "icd10": [],
            "ontologies": [],
            "snomed": [],
            "id": 0,
            "meta_anns": {
                "Status": {
                    "value": "Affirmed",
                    "confidence": 0.9993436932563782,
                    "name": "Status"
                }
            }
        },
        "3": {
            "pretty_name": "Chief complaint (finding)",
            "cui": "C0277786",
            "type_ids": [
                "T033"
   

In [9]:
# Loop through the annotations in 'annotated_text' and print each CUI and its entity name.

for annotation in list(annotated_entities_json['entities'].values()):
    print(annotation['source_value'],'--', annotation['cui'], annotation['pretty_name'], annotation['types'])
    print()

Patient -- C0030705 Patients ['Patient or Disabled Group']

chief complaint -- C0277786 Chief complaint (finding) ['Finding']

chest pain -- C0008031 Chest Pain ['Sign or Symptom']

left arm -- C0230347 Left upper arm structure ['Body Part, Organ, or Organ Component']

pain -- C0030193 Pain ['Sign or Symptom']

pressure -- C0033095 Pressure- physical agent ['Phenomenon or Process']

Patient -- C0030705 Patients ['Patient or Disabled Group']

reports -- C4265456 Statement of witness:Finding:Point in time:^Witness:Narrative:Reported ['Clinical Attribute']

shortness of breath -- C0013404 Dyspnea ['Sign or Symptom']

nausea -- C0027497 Nausea ['Sign or Symptom']

Physical examination -- C0031809 Physical Examination ['Health Care Activity']

elevated blood pressure -- C0497247 Increase in blood pressure ['Finding']

irregular -- C0205271 irregular ['Qualitative Concept']

heartbeat -- C0425583 HB - Heart beat ['Organ or Tissue Function']

Electrocardiogram -- C1623258 Electrocardiography 

### Retrieving all names and semantic types for a Given CUI.

We can retrieve all concept names from KB associated with the CUI by utilizing <code>cdb.cdb.cui2names</code> and  <code>cdb.cui2type_ids</code>function in <code>cat</code>.

In [10]:
# Retrieve all names associated with the CUI from the concept database.
cui = "C0008031"
all_names = cat.cdb.cui2names[cui]

# Print the list of names associated with the given CUI.
print(all_names)

{'pain', 'chest~pain'}


In [11]:
# Retrieve the type_ids associated with the CUI from the concept database.
type_ids = cat.cdb.cui2type_ids[cui]

# Print the list of type_ids associated with the given CUI.
print(type_ids)

{'T184'}


# 2. Using MedCAT to extract information in clinical trials task

### Download and load the data


In [12]:

# Download the data

!gdown https://drive.google.com/uc?id=1hcr1ApEze9DYrmAzD1NQhpV7C50_rUbu

Downloading...
From: https://drive.google.com/uc?id=1hcr1ApEze9DYrmAzD1NQhpV7C50_rUbu
To: /content/ct-topics2022.xml
  0% 0.00/32.4k [00:00<?, ?B/s]100% 32.4k/32.4k [00:00<00:00, 64.9MB/s]


In [13]:

data_path = DIR + 'ct-topics2022.xml'

In [14]:

# Read and Parse XML Data
tree = ET.parse(data_path)
root = tree.getroot()

#  Extract Data and Convert to DataFrame
data = []
for topic in root.findall('topic'):
    number = topic.get('number')
    description = topic.text.strip()

    # Append the extracted data as a dictionary to the 'data' list
    data.append({'Number': number, 'Query': description})

# Create a DataFrame from the extracted data
clinical_df = pd.DataFrame(data)
display(clinical_df)

Unnamed: 0,Number,Query
0,1,A 19-year-old male came to clinic with some se...
1,2,A 32-year-old woman comes to the hospital with...
2,3,A 51-year-old man comes to the office complain...
3,4,A 66-year-old woman comes to the office due to...
4,5,A 23-year-old man comes to the emergency depar...
5,6,A 61-year-old man comes to the clinic due to n...
6,7,A 3-year-old girl is brought to the clinic by ...
7,8,A 7-month-old boy is brought to emergency by h...
8,9,A 67-year-old woman comes to the clinic due to...
9,10,A 19-year-old girl comes to the clinic due to ...


In [15]:
def annotate_with_medcat(ct_text):
    annotated_ct_text = cat.get_entities(ct_text)
    #print(annotated_ct_text)
    med_concepts = set()
    filter_types = ['Disease or Syndrome', 'Finding', 'Body Part', 'Organ, or Organ Component, Diagnostic Procedure', 'Laboratory or Test Result', 'Sign or Symptom']
    text_input = ''

    for annotation in list(annotated_ct_text['entities'].values()):
        # filter with Disease, Body Part, Organ, or Organ Component, Diagnostic Procedure, Laboratory or Test Result  and Sign or Symptom
        if annotation['types'][0].strip() in filter_types:
          med_concepts.add(annotation['pretty_name'])

    # Join entities into a single text input
    text_input = ', '.join(list(med_concepts))

    return text_input

### Print CUI and Corresponding Entity Names from Annotated Text.

In [16]:
# Apply the processing function to the 'description' column
clinical_df['medcat_query'] = clinical_df['Query'].apply(annotate_with_medcat)
display(clinical_df)

Unnamed: 0,Number,Query,medcat_query
0,1,A 19-year-old male came to clinic with some se...,Finding
1,2,A 32-year-old woman comes to the hospital with...,"Febrile urinary tract infection, Finding, Abno..."
2,3,A 51-year-old man comes to the office complain...,"problems sexual, Hypertensive disease, Fatigue..."
3,4,A 66-year-old woman comes to the office due to...,"Bone lesion, Hypertensive disease, Stiffness, ..."
4,5,A 23-year-old man comes to the emergency depar...,"Harsh, Other medical condition, Normal systoli..."
5,6,A 61-year-old man comes to the clinic due to n...,"Hypertensive disease, Pleural effusion disorde..."
6,7,A 3-year-old girl is brought to the clinic by ...,"Increased head circumference, First Visit to T..."
7,8,A 7-month-old boy is brought to emergency by h...,"Symptoms, Actual Discomfort, Sore to touch, Fi..."
8,9,A 67-year-old woman comes to the clinic due to...,"Hypertensive disease, Degenerative polyarthrit..."
9,10,A 19-year-old girl comes to the clinic due to ...,"Swelling, Mass of body structure"
