# Convert some NEO relations to a dict for easy access

## 1. Read the prepared tsv file

In [1]:
import re
import pandas as pd

# Read the TSV file into a DataFrame
df = pd.read_csv('https://www.dropbox.com/s/dk5r022dxsg7k24/NEO%20prepared%20%20%282%29.tsv?dl=1', sep='\t')
df

Unnamed: 0,Preferred Label,Synonyms,Parents
0,swallowing_reflex_absent,,cranial_nerve_reflex_finding
1,obesity_morbid,,obesity
2,headache,,pain
3,homocidal_ideation,,threatening_behavior
4,aphonia,,impaired_speech
...,...,...,...
1603,neck_pain_radiating_to_arm,,pain_radiating
1604,numbness_leg,,numbness_of_lower_limb
1605,impaired_hand_dexterity,,poor_coordination
1606,microcephaly,Abnormally small cranium; Abnormally small hea...,small_head_circumference


## 2. Make a function to handle synonyms as lists

Synonyms are a string of words separated by semicolon, colon or comma. It may contain a period, which is removed. The words of a synonym may be separated by spaces or underscore; normalize to underscore. Return all synonyms in a list.

In [2]:
def bind_words (s):
  return '_'.join(s.split())

def split_syn (syns):
  if syns:
    return [bind_words(syn.replace('.','')) for syn in (re.split('[;:,] *', syns))]
  else:
    return []

Test on one synonyms string.

In [3]:
split_syn('Abnormally small cranium; memory_loss, masked_facies:lack of facial expression.')

['Abnormally_small_cranium',
 'memory_loss',
 'masked_facies',
 'lack_of_facial_expression']

Test on all rows that have synonyms.

In [4]:
for index, row in df.iterrows():
  synonyms = row['Synonyms']
  if not pd.isna(synonyms):
    print(split_syn(synonyms))

['dyssynergia', 'dystaxia']
['constricted_pupil', 'miosis', 'abnormal_non-physiological_constriction_of_the_pupil']
['dysfluency']
['monoparesis_of_arm']
['myoedema']
['decreased_touch_on_sole_of_foot']
['tardive_dystonia']
['optic_papillitis', 'optic_disc_edema', 'optic_disc_swelling', 'choked_disc']
['decreased_radial_reflex']
['Neck_stiffness', 'Stiff_neck', 'Neck_rigidity', 'cervical_rigidity', 'Neck_rigid']
['excessive_tearing', 'watery_eyes', 'epiphora']
['hysterical_blindness']
['memory_loss']
['oculomotor_apraxia']
['lack_of_lacrimation', 'lack_of_tears', 'absent_lacrimation']
['brisk_radial_reflex', 'brisk_brachioradialis_reflex']
['Marcus_Gunn_pupil', 'positive_swinging_flashlight_test']
['monoparesis_of_leg']
['decortication']
['tetraplegia']
['pill_rolling_tremor']
['difficulty_getting_up']
['twelfth_cranial_nerve_finding', 'hypoglossal_nerve_finding']
['Any_finding_involving_the_head_such_as_macrocephaly_or_microcephaly']
['skin_observation']
['decreased_touch_on_hand']
['

Make function that lists normalized synonyms from a Synonyms cell value.

In [5]:
def list_synonyms (synval):
  if pd.isna(synval):
    return []
  else:
    return split_syn(synonyms)

Test on whole dataframe.

In [8]:
for index, row in df.head(20).iterrows():
  synonyms = row['Synonyms']
  print(list_synonyms(synonyms))

[]
[]
[]
[]
[]
[]
[]
[]
[]
['dyssynergia', 'dystaxia']
[]
[]
[]
[]
[]
[]
[]
[]
[]
['constricted_pupil', 'miosis', 'abnormal_non-physiological_constriction_of_the_pupil']


## 3. Make the basic dict based on preferred labels and their properties.

Group the dataframe by parent and aggregate the preferred labels.

In [7]:
# Group by parent and aggregate the preferred labels
df_grouped = df.groupby('Parents')['Preferred Label'].apply(list).reset_index(name='Preferred Labels')

# Rename the parent column to Common Parents
df_grouped = df_grouped.rename(columns={'parent': 'Common Parents'})

# Save the result as a TSV file
df_grouped.to_csv('neo_grouped.tsv', sep='\t', index=False)

# Print the resulting DataFrame
print(df_grouped)


                     Parents  \
0            3rd_nerve_palsy   
1              Babinski_sign   
2            Balint_syndrome   
3               ENT_symptoms   
4                GI_symptoms   
..                       ...   
507  wheelchair_usually_used   
508                withdrawn   
509               wrist_pain   
510         wrist_pain_right   
511            writers_cramp   

                                      Preferred Labels  
0    [third_nerve_palsy_right_eye, 3rd_nerve_palsy_...  
1    [extensor_plantar_responses_bilateral, extenso...  
2      [ocular_apraxia, simultanagnosia, ataxia_optic]  
3    [drooling, tinnitus, ear_pressure, hyperacusis...  
4    [hiccoughs, feeding_difficulties, gastroparesi...  
..                                                 ...  
507                                 [wheelchair_bound]  
508                                       [withdrawal]  
509          [pain_of_right_wrist, pain_of_left_wrist]  
510                           [wrist_joint_pa

Make a new dictionary that associates synonyms, parents and brothers with every preferred label.

In [11]:
# Create an empty dictionary to store the results
preferred_labels_dict = {}

# Iterate through the DataFrame rows
for index, row in df.iterrows():
    parent = row['Parents']
    preferred_label = row['Preferred Label']
    synonyms = row['Synonyms']

    # Check if the preferred label is already in the dictionary
    if preferred_label not in preferred_labels_dict:
        preferred_labels_dict[preferred_label] = {
            'Synonyms': list_synonyms(synonyms),
            'Parents': [parent],
            'Brothers': []
        }
    else:
        # If the preferred label is already in the dictionary, update the 'Parents' list
        preferred_labels_dict[preferred_label]['Parents'].append(parent)

    # Find brothers for the current label
    brothers = df[(df['Parents'] == parent) & (df['Preferred Label'] != preferred_label)]['Preferred Label'].tolist()
    preferred_labels_dict[preferred_label]['Brothers'] = brothers

# Print the created dictionary
for preferred_label, data in list(preferred_labels_dict.items())[:20]:
    print(f"{preferred_label} -> Synonyms: {data['Synonyms']}, Parents: {data['Parents']}, Brothers: {data['Brothers']}")
    #if data['Synonyms']:
      #print(data['Synonyms'])
len(preferred_labels_dict)

swallowing_reflex_absent -> Synonyms: [], Parents: ['cranial_nerve_reflex_finding'], Brothers: ['afferent_pupil_defect', 'absent_direct_light_reflex', 'mesencephalic_light_near_dissociation', 'absent_consensual_reflex', 'decreased_corneal_reflex', 'vestibulo-ocular_reflex_abnormal', 'impaired_gag_reflex']
obesity_morbid -> Synonyms: [], Parents: ['obesity'], Brothers: ['obesity_truncal', 'obesity_CTCAE']
headache -> Synonyms: [], Parents: ['pain'], Brothers: ['pain_radiating', 'pain_jaw', 'pain_foot', 'pain_scalp', 'pain_NOS', 'pain_abdominal', 'pain_eye', 'pain_neuropathic', 'tender_temporal_artery', 'pain_chest', 'pain_arm', 'pain_throat', 'chronic_pain_syndrome', 'pain_neck', 'ache', 'pain_burning', 'pain_back', 'pain_diffuse', 'pain_buttock', 'pain_hip', 'radiculopathy', 'arthralgia', 'myalgias', 'pain_leg', 'pain_pelvic', 'pain_ear', 'pain_shoulder', 'pain_sudden_onset', 'pain_stabbing', 'pain_face']
homocidal_ideation -> Synonyms: [], Parents: ['threatening_behavior'], Brothers: 

1607

In [38]:
print(preferred_labels_dict.get('ataxia'))
print(preferred_labels_dict.get('dystaxia'))
print(preferred_labels_dict.get('dyssynergia'))

{'Synonyms': ['dyssynergia', 'dystaxia'], 'Parents': ['cerebellar_signs'], 'Brothers': []}
{'Synonyms': [], 'Parents': ['ataxia'], 'Brothers': ['ataxia_truncal', 'ataxia_cerebellar', 'rebound_dysmetria', 'ataxia_sensory', 'ataxia_mild', 'ataxia_appendicular', 'ataxic']}
None


## 4. Make a dict in which synonyms point back to their preferred labels and co-synonyms.

Make a new dict `synonymdict`. For every synonym occurring in a list of synonyms in `preferred_labels_dict`, check if it also occurs as a key there. If it does not occur as a key, make a new entry in `synonymdict` with that synonym as key, and a list with the preferred label plus the other synonyms as value.

In [34]:
synonymdict = {}

# Iterate through preferred_labels_dict
for preferred_label, data in preferred_labels_dict.items():
    synonyms = data['Synonyms']

    # Iterate through the synonyms for the current preferred label
    for synonym in synonyms:
        if synonym not in synonymdict and synonym not in preferred_labels_dict:
            # Create a new entry with the synonym as the key and a list with the preferred label and other synonyms as the value
            synonymdict[synonym] = {'Synonyms': [preferred_label] + [syn for syn in synonyms if syn != synonym]}

# Print the created synonym dictionary
for synonym, data in list(synonymdict.items())[:20]:
    print(f"{synonym} -> Synonyms: {data['Synonyms']}")
len(synonymdict)

dyssynergia -> Synonyms: ['ataxia', 'dystaxia']
constricted_pupil -> Synonyms: ['miosis', 'miosis', 'abnormal_non-physiological_constriction_of_the_pupil']
abnormal_non-physiological_constriction_of_the_pupil -> Synonyms: ['miosis', 'constricted_pupil', 'miosis']
monoparesis_of_arm -> Synonyms: ['weakness_arm']
myoedema -> Synonyms: ['myedema']
decreased_touch_on_sole_of_foot -> Synonyms: ['decreased_tactile_sensation_of_sole_of_foot']
tardive_dystonia -> Synonyms: ['tardive_dyskinesia']
optic_papillitis -> Synonyms: ['papilledema', 'optic_disc_edema', 'optic_disc_swelling', 'choked_disc']
optic_disc_edema -> Synonyms: ['papilledema', 'optic_papillitis', 'optic_disc_swelling', 'choked_disc']
optic_disc_swelling -> Synonyms: ['papilledema', 'optic_papillitis', 'optic_disc_edema', 'choked_disc']
choked_disc -> Synonyms: ['papilledema', 'optic_papillitis', 'optic_disc_edema', 'optic_disc_swelling']
decreased_radial_reflex -> Synonyms: ['impaired_supinator_reflex']
Neck_stiffness -> Synony

257

In [39]:
print(synonymdict.get('dyssynergia'))

{'Synonyms': ['ataxia', 'dystaxia']}


## 5. Combine both dicts into one, store and test.

In [36]:
neocombined = preferred_labels_dict.copy()
neocombined.update(synonymdict)
print(len(neocombined), len(preferred_labels_dict), len(synonymdict))

1864 1607 257


In [43]:
print(neocombined.get('ataxia').get('Synonyms'))
print(neocombined.get('dystaxia').get('Synonyms'))
print(neocombined.get('dyssynergia').get('Synonyms'))

['dyssynergia', 'dystaxia']
[]
['ataxia', 'dystaxia']


In [15]:
import json
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [44]:
data_path = 'drive/MyDrive/Colab Notebooks/Samuel/Data/'
with open(data_path + 'neo.json','w') as outfile:
  outfile.write(json.dumps(neocombined))

In [45]:
with open(data_path + 'neo.json') as infile:
  neodict = json.load(infile)

In [50]:
for key, val in list(neodict.items())[:1600:100]:
    print(f"{key} -> {val}")

swallowing_reflex_absent -> {'Synonyms': [], 'Parents': ['cranial_nerve_reflex_finding'], 'Brothers': ['afferent_pupil_defect', 'absent_direct_light_reflex', 'mesencephalic_light_near_dissociation', 'absent_consensual_reflex', 'decreased_corneal_reflex', 'vestibulo-ocular_reflex_abnormal', 'impaired_gag_reflex']}
tonic_clonic_seizure -> {'Synonyms': [], 'Parents': ['seizure'], 'Brothers': ['automatisms', 'seizure_generalized', 'myoclonic_seizures', 'staring_episodes_during_seizures', 'absence_seizure', 'seizure_partial', 'infantile_spasms']}
transient_aphasia -> {'Synonyms': [], 'Parents': ['transient_neurological_symptoms'], 'Brothers': ['transient_limb_weakness', 'transient_amnesia', 'transient_disturbance_of_consciousness', 'temporary_visual_disturbance']}
inability_to_walk -> {'Synonyms': [], 'Parents': ['difficulty_walking'], 'Brothers': ['wheelchair_usually_used', 'impaired_climbing_stairs', 'uses_cane_walker_or_crutch', 'stumbling', 'general_unsteadiness', 'difficulty_descending

In [51]:
print(neodict.get('ataxia').get('Synonyms'))
print(neodict.get('dystaxia').get('Synonyms'))
print(neodict.get('dyssynergia').get('Synonyms'))

['dyssynergia', 'dystaxia']
[]
['ataxia', 'dystaxia']


In [58]:
def try_synonyms(word, ontodict=neodict):
  val = ontodict.get(word)
  if val:
    return val.get('Synonyms')

In [59]:
print(try_synonyms('ataxia'))
print(try_synonyms('dystaxia'))
print(try_synonyms('nonexistent'))

['dyssynergia', 'dystaxia']
[]
None


In [60]:
def try_parents(word, ontodict=neodict):
  val = ontodict.get(word)
  if val:
    return val.get('Parents')

In [61]:
print(try_parents('ataxia'))
print(try_parents('dystaxia'))
print(try_parents('nonexistent'))

['cerebellar_signs']
['ataxia']
None
