### Notebook for extracting MeSH names and publications corresponding to COVID-interacting proteins found via CaseOLAP 

Dylan Steinecke

In [2]:
import pandas as pd
import json

In [3]:
ppm = json.load(open('protein_pmid_mesh.json'))
ppm

{'O00469': [{'30905411': ['Heart Defects, Congenital']}],
 'O14874': [{'31738932': ['Myocardial Reperfusion Injury']}],
 'O60885': [{'28515341': ['Heart Failure', 'Cardiomegaly']},
  {'31215391': ['Coronary Disease']},
  {'31254363': ['Cardiomegaly']}],
 'O75347': [{'21077370': ['Myocarditis']},
  {'2019160': ['Heart Failure']},
  {'26497759': ['Heart Failure']},
  {'28474508': ['Myocarditis']},
  {'23677481': ['Heart Failure']},
  {'1328680': ['Myocarditis']},
  {'20187710': ['Myocarditis']},
  {'19762681': ['Heart Failure', 'Cardiomyopathy, Dilated', 'Myocarditis']},
  {'1602130': ['Myocarditis']},
  {'25701967': ['Heart Failure']},
  {'18715752': ['Myocarditis']},
  {'26887590': ['Heart Failure']},
  {'22760172': ['Heart Failure']},
  {'22161142': ['Myocarditis']},
  {'26621778': ['Myocarditis']},
  {'8617982': ['Myocarditis']},
  {'29887127': ['Heart Failure']},
  {'18250481': ['Myocarditis']},
  {'22774982': ['Myocarditis']},
  {'17339441': ['Myocarditis']},
  {'1932515': ['Myocar

In [22]:
pmids = set()
for protein, pmid_meshs in ppm.items():
    for pmid_mesh in pmid_meshs:
        pmid = list(pmid_mesh.keys())[0]
        pmids.add(pmid)

pmid_to_mesh = dict()
for pmid in pmids:
    pmid_to_mesh[pmid] = set()
    
for protein, pmid_meshs in ppm.items():
    for pmid_mesh in pmid_meshs:
        pmid = list(pmid_mesh.keys())[0]
        meshs = list(pmid_mesh.values())[0]
        for mesh in meshs:
            print(mesh)
            pmid_to_mesh[pmid].add(mesh)

            pmid_to_mesh

Myocarditis


{'3105913': set(),
 '7667549': set(),
 '12119188': set(),
 '8533811': set(),
 '31023080': set(),
 '26311748': set(),
 '7737022': set(),
 '11057810': set(),
 '28097840': set(),
 '9667236': set(),
 '25901639': set(),
 '8854334': set(),
 '25915762': set(),
 '26507407': set(),
 '24293035': set(),
 '16908093': set(),
 '30121201': set(),
 '16397141': set(),
 '22068070': set(),
 '1289218': set(),
 '10681765': set(),
 '23510300': set(),
 '9430386': set(),
 '19927632': set(),
 '29419666': set(),
 '28433454': set(),
 '20828647': set(),
 '17027140': set(),
 '15874972': set(),
 '10660031': set(),
 '19155117': set(),
 '9415694': set(),
 '19111959': set(),
 '25541431': set(),
 '19879983': set(),
 '24346777': set(),
 '2472615': set(),
 '17895565': set(),
 '9025133': set(),
 '1702458': set(),
 '17054467': set(),
 '25447036': set(),
 '27412778': set(),
 '29710761': set(),
 '29017651': set(),
 '15240598': set(),
 '29652463': set(),
 '7297084': set(),
 '30146261': set(),
 '24094529': set(),
 '30738245': 

In [7]:
pmn = [] # [{'UniProt': '', 'MeSH Terms': [],'Number of PubMed Publications of Protein in 8 CVDs': 0},...]

for protein, pms in ppm.items():
    pmids = set()
    meshterms = set()
    for pm in pms:
        pmids.add(list(pm.keys())[0])
        
        for mesh_list in pm.values():
            for mesh in mesh_list:
                meshterms.add(mesh)
    #pmids = len(pmids)
    pmn.append({'UniProt': protein,\
                'MeSH Terms': list(meshterms),\
                'PMIDs' : list(pmids)})
                #'Number of PubMed Publications of Protein in 8 CVDs': pmids})
pmn

[{'MeSH Terms': ['Tachycardia, Ventricular',
   'Myocarditis',
   'Coronary Thrombosis',
   'Heart Defects, Congenital',
   'Myocardial Infarction',
   'Cardiomyopathy, Restrictive',
   'Coronary Restenosis',
   'Mitral Valve Stenosis',
   'Arrhythmias, Cardiac',
   'Heart Neoplasms',
   'Kearns-Sayre Syndrome',
   'Coronary Disease',
   'Angina, Stable',
   'Cardiomyopathy, Hypertrophic',
   'Ventricular Fibrillation',
   'Cardiomyopathies',
   'Aortic Valve Stenosis',
   'Coronary Stenosis',
   'Ventricular Dysfunction, Left',
   'Diabetic Cardiomyopathies',
   'Tachycardia',
   'Atrial Fibrillation',
   'Arrhythmia, Sinus',
   'Tachycardia, Ectopic Atrial',
   'Angina, Unstable',
   'Hypertrophy, Right Ventricular',
   'Out-of-Hospital Cardiac Arrest',
   'Hypertrophy, Left Ventricular',
   'Ventricular Dysfunction, Right',
   'Takotsubo Cardiomyopathy',
   'Acute Coronary Syndrome',
   'Coronary Artery Disease',
   'Angina Pectoris',
   'Cardiomyopathy, Dilated',
   'Heart Failure'

In [8]:
# For PMIDs
with open('uniprot_mesh_pub.json', 'w') as j:
    json.dump(pmn, j)

In [43]:
# For number of PMIDs
#with open('uniprot_mesh_numofpub.json', 'w') as j:
#    json.dump(pmn, j)