In [None]:
!pip install scikit-fuzzy

Defaulting to user installation because normal site-packages is not writeable
Collecting scikit-fuzzy
  Downloading scikit_fuzzy-0.5.0-py2.py3-none-any.whl.metadata (2.6 kB)
Downloading scikit_fuzzy-0.5.0-py2.py3-none-any.whl (920 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m920.8/920.8 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: scikit-fuzzy
Successfully installed scikit-fuzzy-0.5.0


In [5]:
from skfuzzy import control as ctrl
import numpy as np
import skfuzzy as fuzz

condition_severity = ctrl.Antecedent(np.arange(0, 101, 1), 'condition_severity')
symptom_relevance = ctrl.Antecedent(np.arange(0, 101, 1), 'symptom_relevance')
recommendation = ctrl.Consequent(np.arange(0, 101, 1), 'recommendation')

condition_severity['low'] = fuzz.trapmf(condition_severity.universe, [0, 0, 30, 50])
condition_severity['medium'] = fuzz.trimf(condition_severity.universe, [30, 50, 70])
condition_severity['high'] = fuzz.trapmf(condition_severity.universe, [50, 70, 100, 100])

symptom_relevance['low'] = fuzz.trapmf(symptom_relevance.universe, [0, 0, 30, 50])
symptom_relevance['medium'] = fuzz.trimf(symptom_relevance.universe, [30, 50, 70])
symptom_relevance['high'] = fuzz.trapmf(symptom_relevance.universe, [50, 70, 100, 100])

recommendation['routine'] = fuzz.trapmf(recommendation.universe, [0, 0, 30, 50])
recommendation['priority'] = fuzz.trimf(recommendation.universe, [30, 50, 70])
recommendation['urgent'] = fuzz.trapmf(recommendation.universe, [50, 70, 100, 100])

rule1 = ctrl.Rule(condition_severity['high'] & symptom_relevance['high'], recommendation['urgent'])
rule2 = ctrl.Rule(condition_severity['medium'] & symptom_relevance['medium'], recommendation['priority'])
rule3 = ctrl.Rule(condition_severity['low'] & symptom_relevance['low'], recommendation['routine'])

recommendation_ctrl = ctrl.ControlSystem([rule1, rule2, rule3])




In [9]:
recommendation_sim = ctrl.ControlSystemSimulation(recommendation_ctrl)
recommendation_sim.input['condition_severity'] = 80
recommendation_sim.input['symptom_relevance'] = 70
recommendation_sim.compute()

print(recommendation_sim.output['recommendation'])

79.58333333333333


In [13]:
import os

# Define the path where .hea files are stored
hea_files_directory = '../pred_res/input_data/train'
attribute_counts = {"Dx": 0, "Rx": 0, "Hx": 0, "Sx": 0}

# Loop through all .hea files in the directory
for filename in os.listdir(hea_files_directory):
    if filename.endswith('.hea'):
        # Open and read the file
        with open(os.path.join(hea_files_directory, filename), 'r') as file:
            content = file.read()
            
            # Check for the presence of each attribute
            if "#Dx: Unknown" not in content:
                attribute_counts["Dx"] += 1
            if "#Rx: Unknown" not in content:
                attribute_counts["Rx"] += 1
            if "#Hx: Unknown" not in content:
                attribute_counts["Hx"] += 1
            if "#Sx: Unknown" not in content:
                attribute_counts["Sx"] += 1

# Print the count of files with each attribute
print("Presence of attributes across .hea files:")
for attribute, count in attribute_counts.items():
    print(f"{attribute}: {count} files")

# Optional: Print total files processed for context
total_files = len([f for f in os.listdir(hea_files_directory) if f.endswith('.hea')])
print(f"Total .hea files processed: {total_files}")

Presence of attributes across .hea files:
Dx: 38788 files
Rx: 0 files
Hx: 0 files
Sx: 0 files
Total .hea files processed: 38788


In [14]:
!nvidia-smi

Sun Oct 27 01:46:43 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A10                     On  | 00000000:81:00.0 Off |                    0 |
|  0%   36C    P0              62W / 150W |  17624MiB / 23028MiB |      8%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [21]:
import pandas as pd
# Get top record from predictions.csv
top_record = pd.read_csv('predictions.csv', header=0, nrows=20)
print("Top record from predictions.csv:")
print(top_record)

Top record from predictions.csv:
       filename  10370003_label  111975006_label  164889003_label  \
0    E03065.mat               1                0                1   
1    E03604.mat               1                0                1   
2   HR15478.mat               1                0                1   
3     A3309.mat               1                0                1   
4     A2844.mat               1                0                1   
5    E08660.mat               1                0                1   
6   HR13646.mat               1                0                1   
7   HR13474.mat               1                0                1   
8     Q0903.mat               1                0                1   
9     Q2567.mat               1                0                1   
10    A2606.mat               1                0                1   
11    A3498.mat               1                0                1   
12   E07597.mat               1                0                1   
1

In [63]:
from collections import defaultdict
record_dict = defaultdict(dict)
record_dict1 = defaultdict(lambda:defaultdict(dict))

In [64]:
for _, row in top_record.iterrows():
    filename = row['filename']
    for key, value in row.items():
        if key != 'filename':
            record_key = key.split('_')
            record_dict1[filename][record_key[0]][record_key[1]] = value
    # record_dict1[filename] = row

record_dict1

defaultdict(<function __main__.<lambda>()>,
            {'E03065.mat': defaultdict(dict,
                         {'10370003': {'label': 1, 'score': 1.0},
                          '111975006': {'label': 0, 'score': 0.0},
                          '164889003': {'label': 1, 'score': 1.0},
                          '164890007': {'label': 1, 'score': 1.0},
                          '164909002': {'label': 0, 'score': 0.0},
                          '164917005': {'label': 0, 'score': 0.0},
                          '164934002': {'label': 1, 'score': 1.0},
                          '164947007': {'label': 0, 'score': 0.0},
                          '251146004': {'label': 1, 'score': 1.0},
                          '270492004': {'label': 0, 'score': 2.5726672e-36},
                          '284470004': {'label': 1, 'score': 1.0},
                          '39732003': {'label': 0, 'score': 1.9047508e-24},
                          '426177001': {'label': 1, 'score': 1.0},
                      

In [None]:
for key, value in top_record.iloc[0].items():
    if key != 'filename':
        record_key = key.split('_')
        record_dict[record_key[0]][record_key[1]] = value
    else:
        record_dict[key] = value

record_dict

defaultdict(dict,
            {'filename': 'E03065.mat',
             '10370003': {'label': 1, 'score': 1.0, 'strength': 1.0},
             '111975006': {'label': 0, 'score': 0.0, 'strength': 1.0},
             '164889003': {'label': 1, 'score': 1.0, 'strength': 1.0},
             '164890007': {'label': 1, 'score': 1.0, 'strength': 1.0},
             '164909002': {'label': 0, 'score': 0.0, 'strength': 1.0},
             '164917005': {'label': 0, 'score': 0.0, 'strength': 1.0},
             '164934002': {'label': 1, 'score': 1.0, 'strength': 1.0},
             '164947007': {'label': 0, 'score': 0.0, 'strength': 1.0},
             '251146004': {'label': 1, 'score': 1.0, 'strength': 1.0},
             '270492004': {'label': 0,
              'score': 2.5726672e-36,
              'strength': 1.0},
             '284470004': {'label': 1, 'score': 1.0, 'strength': 1.0},
             '39732003': {'label': 0, 'score': 1.9047508e-24, 'strength': 1.0},
             '426177001': {'label': 1,
      

Good enough metric

In [50]:
from math import log, tanh
for key, value in record_dict.items():
    if key != 'filename':
        try:
            label = value.get('label')
            score = value.get('score')
            score = max(min(score, 1 - 1e-15), 1e-15)
            log_odds = log(score / (1 - score))
            record_dict[key]["strength"] = tanh(log_odds) if label == 1 else tanh(-log_odds)
        except KeyError:
            print(f"{key}: {value}")

In [51]:
record_dict

defaultdict(dict,
            {'filename': 'E03065.mat',
             '10370003': {'label': 1, 'score': 1.0, 'strength': 1.0},
             '111975006': {'label': 0, 'score': 0.0, 'strength': 1.0},
             '164889003': {'label': 1, 'score': 1.0, 'strength': 1.0},
             '164890007': {'label': 1, 'score': 1.0, 'strength': 1.0},
             '164909002': {'label': 0, 'score': 0.0, 'strength': 1.0},
             '164917005': {'label': 0, 'score': 0.0, 'strength': 1.0},
             '164934002': {'label': 1, 'score': 1.0, 'strength': 1.0},
             '164947007': {'label': 0, 'score': 0.0, 'strength': 1.0},
             '251146004': {'label': 1, 'score': 1.0, 'strength': 1.0},
             '270492004': {'label': 0,
              'score': 2.5726672e-36,
              'strength': 1.0},
             '284470004': {'label': 1, 'score': 1.0, 'strength': 1.0},
             '39732003': {'label': 0, 'score': 1.9047508e-24, 'strength': 1.0},
             '426177001': {'label': 1, 'score

In [36]:
from owlready2 import *
from owlready2.pymedtermino2 import *
from owlready2.pymedtermino2.umls import *

default_world.set_backend(filename="pym.sqlite3")

In [49]:
import_umls("umls-2024AA-metathesaurus-full.zip", terminologies=["SNOMEDCT_US"])
default_world.save()

Importing UMLS from umls-2024AA-metathesaurus-full.zip with Python version 3.11.7 and Owlready version 2-0.47...
  Parsing 2024AA/META/MRRANK.RRF as MRRANK
  Parsing 2024AA/META/MRCONSO.RRF as MRCONSO
  Parsing 2024AA/META/MRDEF.RRF as MRDEF
  Parsing 2024AA/META/MRREL.RRF as MRREL
  Parsing 2024AA/META/MRSAT.RRF as MRSAT
Breaking ORIG cycles...
    SNOMEDCT_US : 0 cycles found: 
    SRC : 0 cycles found: 
Finalizing only properties and restrictions...
Finalizing CUI - ORIG mapping...
FTS Indexing...


In [50]:
PYM = get_ontology("http://PYM/").load()
SNOMEDCT = PYM["SNOMEDCT_US"]

In [51]:
concept = SNOMEDCT[164917005]

In [52]:
concept

SNOMEDCT_US["164917005"] # EKG: Q wave abnormal

In [53]:
concept.name

'164917005'

In [None]:
concept.label.first().split(":")[1].strip()

'Q wave abnormal'

In [55]:
concept.synonyms

[locstr('ECG: Q wave abnormal', 'en'),
 locstr('Electrocardiographic Q wave abnormal', 'en'),
 locstr('Electrocardiographic Q wave abnormal (finding)', 'en')]

In [67]:
concept.terminology

PYM["SNOMEDCT_US"] # US Edition of SNOMED CT

In [68]:
concept.ancestor_concepts()

[SNOMEDCT_US["164917005"] # EKG: Q wave abnormal,
 SNOMEDCT_US["102594003"] # Electrocardiogram abnormal,
 SNOMEDCT_US["301120008"] # Electrocardiogram finding,
 SNOMEDCT_US["441742003"] # Evaluation finding,
 SNOMEDCT_US["127325009"] # Procedure related finding,
 SNOMEDCT_US["404684003"] # Clinical finding,
 SNOMEDCT_US["138875005"] # SNOMED CT Concept,
 SNOMEDCT_US["442618008"] # Abnormal finding on evaluation procedure,
 SNOMEDCT_US["365412003"] # Q wave - finding,
 SNOMEDCT_US["365408009"] # ECG waveform - finding]

In [69]:
concept.descendant_concepts()

[SNOMEDCT_US["164917005"] # EKG: Q wave abnormal
]

In [71]:
dir(concept)

['INDIRECT_get_properties',
 '__children',
 '__class__',
 '__classcell__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__label',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__parents',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__synonyms',
 '__terminology',
 '__weakref__',
 '_equivalent_to',
 '_get_instance_possible_relations',
 '_get_is_instance_of',
 '_instance_equivalent_to_changed',
 '_instance_is_a_changed',
 '_name',
 '_set_is_instance_of',
 'differents',
 'entity_class',
 'generate_default_name',
 'get_equivalent_to',
 'get_inverse_properties',
 'get_iri',
 'get_name',
 'get_properties',
 'iri',
 'is_a',
 'is_instance_of',
 'name',
 'namespace',
 'reload',
 'set_equivalent_to',
 'set_iri',
 'set_name',
 'storid']

In [73]:
concept.__terminology()

SRC.snomedct_us1

In [77]:
concept.namespace

get_ontology("http://PYM/").get_namespace("http://PYM/SNOMEDCT_US/")

In [83]:
concept.get_class_properties()

{PYM.has_interpretation,
 PYM.ctv3id,
 PYM.groups,
 PYM.synonyms,
 PYM.definition_status_id,
 PYM.subset_member,
 PYM.interprets,
 PYM.term_type,
 PYM.terminology,
 PYM.type_id,
 rdf-schema.label,
 PYM.case_significance_id,
 PYM.active,
 PYM.effective_time}

In [118]:
list(concept.interprets)[0].label.first().split(":")[0].strip()

'Electrocardiographic procedure'

In [102]:
concept.term_type

['PT', 'SY', 'FN']

In [104]:
concept.definition_status_id

['900000000000074008']

In [106]:
concept.groups

[<Group 129964_1> # interprets=Electrocardiographic procedure ; has_interpretation=Abnormal,
 <Group 129964_2> # interprets=Q wave feature]

In [107]:
concept.label

[locstr('EKG: Q wave abnormal', 'en')]

In [119]:
fsn = concept.fsn[0].name

AttributeError: 'fsn' property is not defined.

In [124]:
concept.get_properties(concept)

{PYM.ctv3id,
 rdf-schema.label,
 PYM.synonyms,
 PYM.definition_status_id,
 PYM.subset_member,
 PYM.term_type,
 PYM.terminology,
 PYM.type_id,
 PYM.case_significance_id,
 PYM.active,
 PYM.effective_time}

In [140]:
for rel in concept.get_properties(concept):
    print(rel)
    for value in rel[concept]:
        print(f"{rel}: {value}")

PYM.effective_time
PYM.effective_time: 20020131
PYM.active
PYM.active: 1
PYM.case_significance_id
PYM.case_significance_id: 900000000000017005
PYM.case_significance_id: 900000000000020002
PYM.type_id
PYM.type_id: 900000000000013009
PYM.type_id: 900000000000003001
PYM.terminology
PYM.terminology: PYM["SNOMEDCT_US"] # US Edition of SNOMED CT

PYM.term_type
PYM.term_type: PT
PYM.term_type: SY
PYM.term_type: FN
PYM.subset_member
PYM.subset_member: 900000000000508004~ACCEPTABILITYID~900000000000548007
PYM.subset_member: 900000000000509007~ACCEPTABILITYID~900000000000549004
PYM.subset_member: 900000000000509007~ACCEPTABILITYID~900000000000548007
PYM.subset_member: 900000000000508004~ACCEPTABILITYID~900000000000549004
PYM.subset_member: 900000000000497000~MAPTARGET~32B2.
PYM.subset_member: 6011000124106~MAPGROUP~1
PYM.subset_member: 6011000124106~MAPCATEGORYID~447637006
PYM.subset_member: 6011000124106~MAPPRIORITY~1
PYM.subset_member: 6011000124106~MAPADVICE~ALWAYS R94.31
PYM.subset_member: 6

In [128]:
!pip install wikipedia

Defaulting to user installation because normal site-packages is not writeable
Collecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: wikipedia
  Building wheel for wikipedia (setup.py) ... [?25ldone
[?25h  Created wheel for wikipedia: filename=wikipedia-1.4.0-py3-none-any.whl size=11678 sha256=ba21422284e30e45a352ad125294ad86195dab4de5500fac6fbcd63c3515d957
  Stored in directory: /home/kmallick/.cache/pip/wheels/8f/ab/cb/45ccc40522d3a1c41e1d2ad53b8f33a62f394011ec38cd71c6
Successfully built wikipedia
Installing collected packages: wikipedia
Successfully installed wikipedia-1.4.0


In [129]:
import wikipedia

In [141]:
PYM.effective_time[concept]

['20020131']

In [144]:
summary = wikipedia.summary(concept.label.first().split(":")[1].strip())

In [134]:
summary

'The QRS complex is the combination of three of the graphical deflections seen on a typical electrocardiogram (ECG or EKG). It is usually the central and most visually obvious part of the tracing. It corresponds to the depolarization of the right and left ventricles of the heart and contraction of the large ventricular muscles.\nIn adults, the QRS complex normally lasts 80 to 100 ms; in children it may be shorter. The Q, R, and S waves occur in rapid succession, do not all appear in all leads, and reflect a single event and thus are usually considered together. A Q wave is any downward deflection immediately following the P wave. An R wave follows as an upward deflection, and the S wave is any downward deflection after the R wave. The T wave follows the S wave, and in some cases, an additional U wave follows the T wave.\nTo measure the QRS interval start at the end of the PR interval (or beginning of the Q wave) to the end of the S wave. Normally this interval is 0.08 to 0.10 seconds. 