In [1]:
# #第一次需要
# !pip install spacy
# !pip install scispacy
# !pip install spacy-transformers

# # 安裝models(https://allenai.github.io/scispacy/)
# !pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_scibert-0.4.0.tar.gz

In [2]:
### 前處裡
### 加載scispaCy模型 ('en_core_sci_scibert':完整的SpaCy管道，用於約785k詞彙量的生物醫學數據。) 
import scispacy
import spacy
from spacy_transformers import Transformer
import torch

# models
import en_core_sci_scibert

# Tools for extracting & displaying data
from spacy import displacy
import pandas as pd

### 資料處裡
import pickle
import time
import os

from Bio.KEGG import REST
from Bio.Entrez import efetch

from Bio import Entrez
Entrez.email = "larre0128@gmail.com"


import xml.etree.ElementTree as ET
import re
import pandas as pd


import random
import requests
from bs4 import BeautifulSoup

from urllib.error import HTTPError


import networkx as nx
import pylab 
import numpy as np
import matplotlib.pyplot as plt


import itertools
from itertools import combinations   

In [5]:
#是否存在交集
def isInter(a,b):
    result = list(set(a)&set(b))
    if result:
        return True
    else:
        return False
    
# 儲存變數函數：
def save_variable(v,filename):
    f=open(filename,'wb')
    pickle.dump(v,f)
    f.close()
    return filename

# 讀取變數函數：
def load_variavle(filename):
   f=open(filename,'rb')
   r=pickle.load(f)
   f.close()
   return r

# 句子裡的基因ncbi id
def gene_in_sentence(sentence): 
    sentence_gene = []
    string = sentence
    while string.find('Gene_')!=-1: #句子還有Gene，就繼續尋找
        sentence = string
        x = sentence.find('Gene_') + 4

        #ID至少1位數，往後找到完整ID 
        l=1
        string = sentence[x+1:]
        while string[:l].isdigit()==True: l+=1    
        ncbi_id_i = string[:l-1]
        sentence_gene.append(ncbi_id_i)
    return list(set(sentence_gene))

# nlp preprocessing
def Display_interaction_Sent(doc,Gene1,Gene2,INTERACTION):
    # Display DNA Entities 
    sent = doc.text[:]

    # POS、Tag
    tt = ""
    pp = ""
    for token in doc:
        if 'Gene' in token.text:
            tt = tt +' '+ token.text
            pp = pp +' '+ token.text
        else:
            tt = tt +' '+ token.tag_
            pp = pp +' '+ token.pos_
            
    print('\033[1m'+'Label :','\033[0m',INTERACTION) #Label
    print('\033[1m'+'Gene pair :','\033[0m',Gene1,Gene2) #Gene pair
    print('\033[1m'+'Sent :','\033[0m',sent) #Sentence
    print('\033[1m'+'Tag Sent:'+'\033[0m',tt)
    print('\033[1m'+'POS Sent:'+'\033[0m',pp,'\n')
      
# Define_Graph
def Define_Graph(doc):
    ### Define 'nodes' & 'edges' 
    #(1).define nodes(id)
    nodes = [j for j in range(0,len(doc))]
    print('\033[1m'+'nodes:','\033[0m'+'\n',nodes)

    #(2).define edges
    edges = []
    for token in doc:
        for child in token.children:
            edges.append((token.i,
                          child.i))
    print('\033[1m'+'edges:','\033[0m'+'\n',edges)
    
    ### Define 'nodes label' & 'edges label'
    #(3).define nodes_label
    nodes_tag = []
    for token in doc:
        n = token.tag_
        nodes_tag.append(('{0}'.format(n)))
    nodes_label = dict(zip(nodes, nodes_tag))
    print('\033[1m'+'nodes_label:','\033[0m')
    print(nodes_label)

    #(4).define edges_label
    #attribute(dep relation, dep direction)******
    dep_direction = []
    Syntax_relation = []
    for p in range(0,len(edges)):
        F = int(edges[p][0])
        C = int(edges[p][1])
        # Direction
        if C > F:
            d = 'Backward' #Backward
            dep_direction.append(d)
        else:
            d = 'Forward' #Forward
            dep_direction.append(d)

        # Syntax
        s = doc[C].dep_
        Syntax_relation.append(s)
    edges_label = dict(zip(edges,list(map(lambda x: np.array(x),zip(dep_direction,Syntax_relation)))))
    print('\033[1m'+'edges_label:','\033[0m')
    print("\n".join(str(i)+str(edges_label[i])for i in edges_label))

    #(5).define edges_weight
    #short dep path = 0.9 , 0.1
    weg = []
    for p in range(0,len(edges)):
        F = int(edges[p][0])
        C = int(edges[p][1])

        if C in path and F in path:
            weg.append(0.9)
        else:
            weg.append(0.1)
    edge_weight = dict(zip(edges,weg))
    print('\033[1m'+'edge_weight:','\033[0m')
    print(edge_weight)
    
    
    globals()['nodes'] = nodes
    globals()['edges'] = edges
    globals()['nodes_label'] = nodes_label
    globals()['edges_label'] = edges_label
    globals()['edge_weight'] = edge_weight
    
# load spacy model
def load_en_core_sci_scibert_model():
    spacy.prefer_gpu() #use GPU
    nlp = en_core_sci_scibert.load()
    return nlp

# load kegg gene pair
def kegg_gene_pair_fun():
    os.chdir('C:/Users/larry/Desktop/jupyter/Interaction corpus/kegg_all_gene_pair')
    kegg_gene_pair = read_csv('kegg_gene_pair.csv')
    return kegg_gene_pair

# load kegg gene list
def kegg_gene_list_fun():
    os.chdir('C:/Users/larry/Desktop/jupyter/Interaction corpus/kegg_all_gene_pair')
    kegg_gene_list = load('kegg_gene_list.npy').tolist()
    return kegg_gene_list

Reference page URL:
1. KEGG API : https://www.kegg.jp/kegg/rest/keggapi.html
2. .xml Guide : https://officeguide.cc/python-read-write-xml-format-file-tutorial-examples/
3. KEGG pathway categorical : https://www.kegg.jp/kegg/pathway.html
3. Biopython in KEGG : http://www.360doc.com/content/21/0109/14/68068867_956010436.shtml
4. Beautiful Soup Documentation : https://www.crummy.com/software/BeautifulSoup/bs4/doc/
5. Biopython document : https://biopython-tutorial.readthedocs.io/en/latest/notebooks/09%20-%20Accessing%20NCBIs%20Entrez%20databases.html
6. KGML format : https://www.genome.jp/kegg/xml/docs/
7. grakel.graph_from_networkx : https://ysig.github.io/GraKeL/0.1a8/generated/grakel.graph_from_networkx.html

# KEGG API
https://www.kegg.jp/kegg/rest/keggapi.html

In [2]:
# !pip install biopython

## Get data (example)

### Ref PMID

In [3]:
pathway_list = ["hsa05171","hsa04930"] #pathway list

Ref_PMID_DF = pd.DataFrame(columns=['kegg_pmid','pathway'])


for pathway in pathway_list:
    
    #####-----Reference PubMed Abstrat-----#####
    kegg_pmid = []
    # biopython-REST-kegg_get
    pathway_file = REST.kegg_get(pathway).read()
    # Traverse all row -find section 
    current_section = None
    for line in pathway_file.rstrip().split("\n"):
        section = line[:12].strip()
        if not section == "":
            current_section = section
        if current_section == "REFERENCE":
            pmid_index = line.index("PMID:")
            pmid = line[pmid_index+5:pmid_index+13] #K13946
            kegg_pmid.append(pmid)
            
    print('\033[1m'+pathway+' Refer PMID:\n'+'\033[0m',kegg_pmid)
        
    
    Ref_PMID_sub = pd.DataFrame(columns=['kegg_pmid'])
    Ref_PMID_sub['kegg_pmid'] = kegg_pmid
    Ref_PMID_sub['pathway'] = pathway
    Ref_PMID_DF = pd.concat([Ref_PMID_DF,Ref_PMID_sub])

[1mhsa05171 Refer PMID:
[0m ['33132005', '33077917', '32820801', '32353634', '33014208', '32544563', '32635353', '32376392', '32722596', '32922297', '33055229', '32973803', '32346093', '32979938', '32661197', '32978971', '32467561', '32687918', '32376901', '32246101', '32433641', '32423094', '33184193', '32327719', '32461141', '32559343', '32439870', '32643798', '32608159', '32558620', '32586214', '32995777', '32693241', '32405269', '32337664']
[1mhsa04930 Refer PMID:
[0m ['15823385', '15733744', '11078440', '11272200', '15759102', '12480546', '11390407', '9011569', '7491105', '15258147', '9974390', '11325516', '15585596']


In [4]:
Ref_PMID_DF

Unnamed: 0,kegg_pmid,pathway
0,33132005,hsa05171
1,33077917,hsa05171
2,32820801,hsa05171
3,32353634,hsa05171
4,33014208,hsa05171
5,32544563,hsa05171
6,32635353,hsa05171
7,32376392,hsa05171
8,32722596,hsa05171
9,32922297,hsa05171


### XML : relation(entites pair)
https://www.genome.jp/kegg/xml/docs/


In [5]:
pathway_list = ["hsa05171","hsa04930"] #pathway list

for pathway in pathway_list:
    #####-----XML : relation(entites pair)-----#####
    path_xml = REST.kegg_get(pathway, "kgml").read()
    root = ET.fromstring(path_xml)
    ### xml relation list
    xml_relation_list = pd.DataFrame(columns=['gene1_id','gene2_id',
                                              'gene1','gene2',
                                              'gene1_hsa','gene2_hsa',
                                              'gene1_type','gene2_type',
                                              'relation_type',
                                              'relation_type2'])
   

    index = 0
    for relation  in root.iter('relation'):
        print('\033[1m relation {} in {} \033[0m'.format(index,pathway))
        # relation
        relation_type_list=[]
        for r in relation.findall('subtype'):
            relation_type_list.append(r.attrib['name'])
        relation_type = ", ".join(relation_type_list)
        # relation 2
        relation_type2 = relation.attrib['type']
        # gene symbol
        for entry in root.iter('entry'): #recoganize vertex
            
            if entry.attrib['id'] == relation.attrib['entry1']:
                if entry.attrib['name'] != 'undefined':
                    gene1 = entry.find('graphics').attrib['name']
                else:
                    gene1 = "undefined"
                    
                gene1_hsa = entry.attrib['name']
                gene1_type = entry.attrib['type']
                
                
            if entry.attrib['id'] == relation.attrib['entry2']:
                if entry.attrib['name'] != 'undefined':
                    gene2 = entry.find('graphics').attrib['name']
                else:
                    gene2 = "undefined"
                    
                gene2_hsa = entry.attrib['name']
                gene2_type = entry.attrib['type']

        print(' gene1:', gene1,'\n','gene2:', gene2)
        
        xml_relation_list.loc[index] = [relation.attrib['entry1'],
                                        relation.attrib['entry2'],
                                        gene1, gene2,
                                        gene1_hsa, gene2_hsa,
                                        gene1_type, gene2_type,
                                        relation_type,
                                        relation_type2]
        index += 1
    xml_relation_list['pathway'] = pathway
    
    ## only fetch gene-gene interaction
    gene_relation_list = xml_relation_list[(xml_relation_list['gene1_type']=='gene')&(xml_relation_list['gene2_type']=='gene')]

    
    
    ### gene_symbol_list
    a = gene_relation_list.iloc[:,[0,2,4,6,8]][:]
    b = gene_relation_list.iloc[:,[1,3,5,7,8]][:]
    a.rename(columns={'gene1_id':'gene_id', 'gene1':'gene', 'gene1_hsa':'gene_hsa', 'gene1_type':'gene_type'}, inplace=True)
    b.rename(columns={'gene2_id':'gene_id', 'gene2':'gene', 'gene2_hsa':'gene_hsa', 'gene2_type':'gene_type'}, inplace=True)
    
    gene_symbol_list = pd.concat([a,b]).drop_duplicates()
    gene_symbol_list['pathway'] = pathway

    
    
    globals()['{}_xml_relation_list'.format(pathway)] = gene_relation_list
    globals()['{}_gene_symbol_list'.format(pathway)] = gene_symbol_list

[1m relation 0 in hsa05171 [0m
 gene1: C00873 
 gene2: C02135
[1m relation 1 in hsa05171 [0m
 gene1: C02135 
 gene2: AGTR1, AG2S, AGTR1B, AT1, AT1AR, AT1B, AT1BR, AT1R, AT2R1, HAT1R
[1m relation 2 in hsa05171 [0m
 gene1: CYBB, AMCBX2, CGD, GP91-1, GP91-PHOX, GP91PHOX, IMD34, NOX2, p91-PHOX 
 gene2: C00704...
[1m relation 3 in hsa05171 [0m
 gene1: C00704... 
 gene2: NFKB1, CVID12, EBP-1, KBF1, NF-kB, NF-kB1, NF-kappa-B1, NF-kappaB, NF-kappabeta, NFKB-p105, NFKB-p50, NFkappaB...
[1m relation 4 in hsa05171 [0m
 gene1: ADAM17, ADAM18, CD156B, CSVP, NISBD, NISBD1, TACE 
 gene2: TNF, DIF, TNF-alpha, TNFA, TNFSF2, TNLG1F
[1m relation 5 in hsa05171 [0m
 gene1: ADAM17, ADAM18, CD156B, CSVP, NISBD, NISBD1, TACE 
 gene2: HBEGF, DTR, DTS, DTSF, HEGFL
[1m relation 6 in hsa05171 [0m
 gene1: ADAM17, ADAM18, CD156B, CSVP, NISBD, NISBD1, TACE 
 gene2: IL6R, CD126, HIES5, IL-6R-1, IL-6RA, IL6Q, IL6QTL, IL6RA, IL6RQ, gp80
[1m relation 7 in hsa05171 [0m
 gene1: TNF, DIF, TNF-alpha, TNFA, T

[1m relation 0 in hsa04930 [0m
 gene1: INS, IDDM, IDDM1, IDDM2, ILPR, IRDN, MODY10 
 gene2: INSR, CD220, HHF5
[1m relation 1 in hsa04930 [0m
 gene1: INSR, CD220, HHF5 
 gene2: IRS1, HIRS-1
[1m relation 2 in hsa04930 [0m
 gene1: IRS1, HIRS-1 
 gene2: PIK3CA, CLAPO, CLOVE, CWS5, MCAP, MCM, MCMTC, PI3K, PI3K-alpha, p110-alpha...
[1m relation 3 in hsa04930 [0m
 gene1: INSR, CD220, HHF5 
 gene2: IRS1, HIRS-1...
[1m relation 4 in hsa04930 [0m
 gene1: IRS1, HIRS-1... 
 gene2: PIK3CA, CLAPO, CLOVE, CWS5, MCAP, MCM, MCMTC, PI3K, PI3K-alpha, p110-alpha...
[1m relation 5 in hsa04930 [0m
 gene1: ADIPOQ, ACDC, ACRP30, ADIPQTL1, ADPN, APM-1, APM1, GBP28 
 gene2: SLC2A4, GLUT4
[1m relation 6 in hsa04930 [0m
 gene1: INSR, CD220, HHF5 
 gene2: MAPK1, ERK, ERK-2, ERK2, ERT1, MAPK2, P42MAPK, PRKM1, PRKM2, p38, p40, p41, p41mapk, p42-MAPK...
[1m relation 7 in hsa04930 [0m
 gene1: TNF, DIF, TNF-alpha, TNFA, TNFSF2, TNLG1F 
 gene2: IKBKB, IKK-beta, IKK2, IKKB, IMD15, IMD15A, IMD15B, NFKBIKB


In [6]:
hsa05171_xml_relation_list

Unnamed: 0,gene1_id,gene2_id,gene1,gene2,gene1_hsa,gene2_hsa,gene1_type,gene2_type,relation_type,relation_type2,pathway
4,201,202,"ADAM17, ADAM18, CD156B, CSVP, NISBD, NISBD1, TACE","TNF, DIF, TNF-alpha, TNFA, TNFSF2, TNLG1F",hsa:6868,hsa:7124,gene,gene,activation,PPrel,hsa05171
5,201,412,"ADAM17, ADAM18, CD156B, CSVP, NISBD, NISBD1, TACE","HBEGF, DTR, DTS, DTSF, HEGFL",hsa:6868,hsa:1839,gene,gene,activation,PPrel,hsa05171
6,201,203,"ADAM17, ADAM18, CD156B, CSVP, NISBD, NISBD1, TACE","IL6R, CD126, HIES5, IL-6R-1, IL-6RA, IL6Q, IL6...",hsa:6868,hsa:3570,gene,gene,activation,PPrel,hsa05171
7,416,425,"TNF, DIF, TNF-alpha, TNFA, TNFSF2, TNLG1F","TNFRSF1A, CD120a, FPF, TBP1, TNF-R, TNF-R-I, T...",hsa:7124,hsa:7132,gene,gene,activation,PPrel,hsa05171
8,418,426,"HBEGF, DTR, DTS, DTSF, HEGFL","EGFR, ERBB, ERBB1, HER1, NISBD2, PIG61, mENA",hsa:1839,hsa:1956,gene,gene,activation,PPrel,hsa05171
...,...,...,...,...,...,...,...,...,...,...,...
84,303,244,"CHUK, IKBKA, IKK-alpha, IKK1, IKKA, NFKBIKA, T...","NFKBIA, EDAID2, IKBA, MAD-3, NFKBI...",hsa:1147 hsa:3551 hsa:8517,hsa:4792 hsa:4793,gene,gene,"activation, phosphorylation",PPrel,hsa05171
86,257,258,"JAK1, AIIDE, JAK1A, JAK1B, JTK3...","STAT1, CANDF7, IMD31A, IMD31B, IMD31C, ISGF-3,...",hsa:3716 hsa:7297,hsa:6772,gene,gene,"activation, phosphorylation",PPrel,hsa05171
87,257,350,"JAK1, AIIDE, JAK1A, JAK1B, JTK3...","STAT2, IMD44, ISGF-3, P113, PTORCH3, STAT113",hsa:3716 hsa:7297,hsa:6773,gene,gene,"activation, phosphorylation",PPrel,hsa05171
89,444,681,"NFKB1, CVID12, EBP-1, KBF1, NF-kB, NF-kB1, NF-...","IL6, BSF-2, BSF2, CDF, HGF, HSF, IFN-beta-2, I...",hsa:4790 hsa:5970,hsa:3569,gene,gene,expression,GErel,hsa05171


In [7]:
hsa04930_gene_symbol_list

Unnamed: 0,gene_id,gene,gene_hsa,gene_type,relation_type,pathway
0,17,"INS, IDDM, IDDM1, IDDM2, ILPR, IRDN, MODY10",hsa:3630,gene,activation,hsa04930
1,16,"INSR, CD220, HHF5",hsa:3643,gene,activation,hsa04930
2,15,"IRS1, HIRS-1",hsa:3667,gene,activation,hsa04930
4,31,"IRS1, HIRS-1...",hsa:3667 hsa:8471 hsa:8660,gene,activation,hsa04930
5,27,"ADIPOQ, ACDC, ACRP30, ADIPQTL1, ADPN, APM-1, A...",hsa:9370,gene,indirect effect,hsa04930
6,16,"INSR, CD220, HHF5",hsa:3643,gene,indirect effect,hsa04930
7,34,"TNF, DIF, TNF-alpha, TNFA, TNFSF2, TNLG1F",hsa:7124,gene,indirect effect,hsa04930
10,13,"PIK3CA, CLAPO, CLOVE, CWS5, MCAP, MCM, MCMTC, ...",hsa:5290 hsa:5291 hsa:5293 hsa:5295 hsa:5296 h...,gene,indirect effect,hsa04930
13,22,"SOCS4, SOCS7...",hsa:122809 hsa:8651 hsa:8835 hsa:9021,gene,inhibition,hsa04930
15,28,"IKBKB, IKK-beta, IKK2, IKKB, IMD15, IMD15A, IM...",hsa:3551,gene,"inhibition, phosphorylation",hsa04930


## Get data (Apply)
only 'Human Diseases' from KEGG Pathway Maps

### Get 'Human Diseases' pathway list


In [6]:
#kegg pathway categorical
url = 'https://www.kegg.jp/kegg/pathway.html'

#check connect
resp = requests.get(url)
if resp.status_code != 200:
    print('Webpage error:', url)

#parsing page
soup = BeautifulSoup(resp.text, 'html.parser')

#find parhway
result = soup.find_all("h4")
result = soup.find_all(["h4","dt"])

# only get human diseases 
for r in result:
    if 'Human Diseases' in str(r):
        start = result.index(r)+1
result2 = result[start:]  
for r in result2:
    if '<h4' in str(r):
        end = result.index(r)
result3 = result[start:end]   


pathway_list = []
#get pathway list
for r in result3:
    path = 'hsa'+r.contents[0].replace(' ','')    
    pathway_list.append(path)
    
            
pathway_list

['hsa05200',
 'hsa05202',
 'hsa05206',
 'hsa05205',
 'hsa05204',
 'hsa05203',
 'hsa05230',
 'hsa05231',
 'hsa05235',
 'hsa05210',
 'hsa05212',
 'hsa05225',
 'hsa05226',
 'hsa05214',
 'hsa05216',
 'hsa05221',
 'hsa05220',
 'hsa05217',
 'hsa05218',
 'hsa05211',
 'hsa05219',
 'hsa05215',
 'hsa05213',
 'hsa05224',
 'hsa05222',
 'hsa05223',
 'hsa05166',
 'hsa05170',
 'hsa05161',
 'hsa05160',
 'hsa05171',
 'hsa05164',
 'hsa05162',
 'hsa05168',
 'hsa05163',
 'hsa05167',
 'hsa05169',
 'hsa05165',
 'hsa05110',
 'hsa05120',
 'hsa05130',
 'hsa05132',
 'hsa05131',
 'hsa05135',
 'hsa05133',
 'hsa05134',
 'hsa05150',
 'hsa05152',
 'hsa05100',
 'hsa05146',
 'hsa05144',
 'hsa05145',
 'hsa05140',
 'hsa05142',
 'hsa05143',
 'hsa05310',
 'hsa05322',
 'hsa05323',
 'hsa05320',
 'hsa05321',
 'hsa05330',
 'hsa05332',
 'hsa05340',
 'hsa05010',
 'hsa05012',
 'hsa05014',
 'hsa05016',
 'hsa05017',
 'hsa05020',
 'hsa05022',
 'hsa05030',
 'hsa05031',
 'hsa05032',
 'hsa05033',
 'hsa05034',
 'hsa05417',
 'hsa05418',

In [7]:
len(pathway_list)

96

### Traverse all pathway



In [8]:
print ("Start : %s" % time.ctime())

Ref_PMID_DF = pd.DataFrame(columns=['kegg_pmid','pathway'])

for pathway in pathway_list:
    print('\033[1m------------------------------------------{}.{}------------------------------------------\033[0m'.format(pathway_list.index(pathway),pathway))
    #####-----Reference PubMed Abstrat-----#####
    kegg_pmid = []
    
    # biopython-REST-kegg_get
    try:
        pathway_file = REST.kegg_get(pathway).read()
    except HTTPError as err:
        if err.code == 404:
            print("Received error from server :  '%s'" % err)
            print("page is not found, skip",pathway)
            continue
    
    # Traverse all row -find section 
    current_section = None
    for line in pathway_file.rstrip().split("\n"):
        section = line[:12].strip()
        if not section == "":
            current_section = section
        if current_section == "REFERENCE" and 'PMID' in line:
            pmid_index = line.index("PMID:")
            pmid = line[pmid_index+5:pmid_index+13] #K13946
            kegg_pmid.append(pmid)
            
    print('\033[1m'+pathway+' Refer PMID:\n'+'\033[0m',kegg_pmid)
        
    
    Ref_PMID_sub = pd.DataFrame(columns=['kegg_pmid'])
    Ref_PMID_sub['kegg_pmid'] = kegg_pmid
    Ref_PMID_sub['pathway'] = pathway
    Ref_PMID_DF = pd.concat([Ref_PMID_DF,Ref_PMID_sub])
    
    
    
     #####-----XML : relation(entites pair)-----#####
    path_xml = REST.kegg_get(pathway, "kgml").read()
    root = ET.fromstring(path_xml)
    ### xml relation list
    xml_relation_list = pd.DataFrame(columns=['gene1_id','gene2_id',
                                              'gene1','gene2',
                                              'gene1_hsa','gene2_hsa',
                                              'gene1_type','gene2_type',
                                              'relation_type',
                                              'relation_type2'])
   

    index = 0
    for relation  in root.iter('relation'):
        print('\033[1m relation {} in {} \033[0m'.format(index,pathway))
        # relation
        relation_type_list=[]
        for r in relation.findall('subtype'):
            relation_type_list.append(r.attrib['name'])
        relation_type = ", ".join(relation_type_list)
        # relation 2
        relation_type2 = relation.attrib['type']
        # gene symbol
        for entry in root.iter('entry'): #recoganize vertex
            
            if entry.attrib['id'] == relation.attrib['entry1']:
                if entry.attrib['name'] != 'undefined':
                    gene1 = entry.find('graphics').attrib['name']
                else:
                    gene1 = "undefined"
                    
                gene1_hsa = entry.attrib['name']
                gene1_type = entry.attrib['type']
                
                
            if entry.attrib['id'] == relation.attrib['entry2']:
                if entry.attrib['name'] != 'undefined':
                    gene2 = entry.find('graphics').attrib['name']
                else:
                    gene2 = "undefined"
                    
                gene2_hsa = entry.attrib['name']
                gene2_type = entry.attrib['type']

        
        xml_relation_list.loc[index] = [relation.attrib['entry1'],
                                        relation.attrib['entry2'],
                                        gene1, gene2,
                                        gene1_hsa, gene2_hsa,
                                        gene1_type, gene2_type,
                                        relation_type,
                                        relation_type2]
        index += 1
    xml_relation_list['pathway'] = pathway
    
    ## only fetch gene-gene interaction
    gene_relation_list = xml_relation_list[(xml_relation_list['gene1_type']=='gene')&(xml_relation_list['gene2_type']=='gene')]

    
    
    ### gene_symbol_list
    a = gene_relation_list.iloc[:,[0,2,4,6,8]][:]
    b = gene_relation_list.iloc[:,[1,3,5,7,8]][:]
    a.rename(columns={'gene1_id':'gene_id', 'gene1':'gene', 'gene1_hsa':'gene_hsa', 'gene1_type':'gene_type'}, inplace=True)
    b.rename(columns={'gene2_id':'gene_id', 'gene2':'gene', 'gene2_hsa':'gene_hsa', 'gene2_type':'gene_type'}, inplace=True)
    
    gene_symbol_list = pd.concat([a,b]).drop_duplicates()
    gene_symbol_list['pathway'] = pathway

    
    globals()['{}_xml_relation_list'.format(pathway)] = gene_relation_list
    globals()['{}_gene_symbol_list'.format(pathway)] = gene_symbol_list

    
Ref_PMID_DF = Ref_PMID_DF[Ref_PMID_DF.astype(str).ne('None').all(1)] 

print ("End : %s" % time.ctime())



Start : Thu May 20 12:36:24 2021
[1m------------------------------------------0.hsa05200------------------------------------------[0m
[1mhsa05200 Refer PMID:
[0m ['10647931', '15000146', '16555243', '11078609', '16699851', '12621137', '12951588', '10505543', '16042571', '15349822', '9196022', '12737309', '11459867', '15573119', '15310786', '11223406', '15711891', '11170304', '16326109', '15479695', '11477132', '12459728', '11407945', '12946833', '16702400', '10740269', '12648469', '14967450', '14586404', '12545153', '14580692', '12079267', '15774796', '15823750', '11707511', '15639402', '12154354', '11253051', '16700615', '12762887', '16095998', '11057895', '9643506', '12084351', '16557281', '16551846', '16946003', '17062879', '16242838', '16189702', '11165748', '12094241', '16236521', '12563308', '16146838', '12951584', '16642045', '15625120', '11721960', '12468433', '17554387', '16352814', '15719031', '11071626', '14982876', '10403855', '15156182', '16484590', '9834202', '1280183

[1m relation 231 in hsa05200 [0m
[1m relation 232 in hsa05200 [0m
[1m relation 233 in hsa05200 [0m
[1m relation 234 in hsa05200 [0m
[1m relation 235 in hsa05200 [0m
[1m relation 236 in hsa05200 [0m
[1m relation 237 in hsa05200 [0m
[1m relation 238 in hsa05200 [0m
[1m relation 239 in hsa05200 [0m
[1m relation 240 in hsa05200 [0m
[1m relation 241 in hsa05200 [0m
[1m relation 242 in hsa05200 [0m
[1m relation 243 in hsa05200 [0m
[1m relation 244 in hsa05200 [0m
[1m relation 245 in hsa05200 [0m
[1m relation 246 in hsa05200 [0m
[1m relation 247 in hsa05200 [0m
[1m relation 248 in hsa05200 [0m
[1m relation 249 in hsa05200 [0m
[1m relation 250 in hsa05200 [0m
[1m relation 251 in hsa05200 [0m
[1m relation 252 in hsa05200 [0m
[1m relation 253 in hsa05200 [0m
[1m relation 254 in hsa05200 [0m
[1m relation 255 in hsa05200 [0m
[1m relation 256 in hsa05200 [0m
[1m relation 257 in hsa05200 [0m
[1m relation 258 in hsa05200 [0m
[1m relation 259 in

[1m relation 149 in hsa05206 [0m
[1m relation 150 in hsa05206 [0m
[1m relation 151 in hsa05206 [0m
[1m relation 152 in hsa05206 [0m
[1m relation 153 in hsa05206 [0m
[1m relation 154 in hsa05206 [0m
[1m relation 155 in hsa05206 [0m
[1m relation 156 in hsa05206 [0m
[1m relation 157 in hsa05206 [0m
[1m relation 158 in hsa05206 [0m
[1m relation 159 in hsa05206 [0m
[1m relation 160 in hsa05206 [0m
[1m relation 161 in hsa05206 [0m
[1m relation 162 in hsa05206 [0m
[1m relation 163 in hsa05206 [0m
[1m relation 164 in hsa05206 [0m
[1m relation 165 in hsa05206 [0m
[1m relation 166 in hsa05206 [0m
[1m relation 167 in hsa05206 [0m
[1m relation 168 in hsa05206 [0m
[1m relation 169 in hsa05206 [0m
[1m relation 170 in hsa05206 [0m
[1m relation 171 in hsa05206 [0m
[1m relation 172 in hsa05206 [0m
[1m relation 173 in hsa05206 [0m
[1m relation 174 in hsa05206 [0m
[1m relation 175 in hsa05206 [0m
[1m relation 176 in hsa05206 [0m
[1m relation 177 in

[1mhsa05204 Refer PMID:
[0m ['15660110', '14570033', '18066431', '15489140', '21234336', '12928348', '11978484', '11275476', '20232918', '9202760', '20570348', '8702479', '21456541', '20669524', '17445838', '15298956', '19531241', '11376689', '11467078', '15532073', '21217854', '10913381', '11092599', '22705365', '12435844', '9275165', '8759017', '21473878', '21154658', '20159989', '17459420', '22178655', '8951235', '21163908', '10753215', '15582261', '19657920', '16421178', '16959879', '11222868', '9356303', '9152590', '21255676', '16084004', '17292678']
[1m relation 0 in hsa05204 [0m
[1m relation 1 in hsa05204 [0m
[1m relation 2 in hsa05204 [0m
[1m relation 3 in hsa05204 [0m
[1m relation 4 in hsa05204 [0m
[1m relation 5 in hsa05204 [0m
[1m relation 6 in hsa05204 [0m
[1m relation 7 in hsa05204 [0m
[1m relation 8 in hsa05204 [0m
[1m relation 9 in hsa05204 [0m
[1m relation 10 in hsa05204 [0m
[1m relation 11 in hsa05204 [0m
[1m relation 12 in hsa05204 [0m
[1m

[1m relation 75 in hsa05235 [0m
[1m relation 76 in hsa05235 [0m
[1m relation 77 in hsa05235 [0m
[1m relation 78 in hsa05235 [0m
[1m relation 79 in hsa05235 [0m
[1m------------------------------------------9.hsa05210------------------------------------------[0m
[1mhsa05210 Refer PMID:
[0m ['15000146', '16555243', '11078609', '16699851', '12621137', '12951588', '10505543', '16042571', '15349822', '9196022', '12737309', '11459867', '15573119', '15310786', '11223406', '15711891', '11170304', '16326109', '15479695', '11477132', '19167459', '18568040', '24213116', '20554751', '26149458']
[1m relation 0 in hsa05210 [0m
[1m relation 1 in hsa05210 [0m
[1m relation 2 in hsa05210 [0m
[1m relation 3 in hsa05210 [0m
[1m relation 4 in hsa05210 [0m
[1m relation 5 in hsa05210 [0m
[1m relation 6 in hsa05210 [0m
[1m relation 7 in hsa05210 [0m
[1m relation 8 in hsa05210 [0m
[1m relation 9 in hsa05210 [0m
[1m relation 10 in hsa05210 [0m
[1m relation 11 in hsa05210 [0m

[1mhsa05214 Refer PMID:
[0m ['15639402', '12154354', '11253051', '16700615', '12762887', '16095998', '11057895', '9643506', '12084351']
[1m relation 0 in hsa05214 [0m
[1m relation 1 in hsa05214 [0m
[1m relation 2 in hsa05214 [0m
[1m relation 3 in hsa05214 [0m
[1m relation 4 in hsa05214 [0m
[1m relation 5 in hsa05214 [0m
[1m relation 6 in hsa05214 [0m
[1m relation 7 in hsa05214 [0m
[1m relation 8 in hsa05214 [0m
[1m relation 9 in hsa05214 [0m
[1m relation 10 in hsa05214 [0m
[1m relation 11 in hsa05214 [0m
[1m relation 12 in hsa05214 [0m
[1m relation 13 in hsa05214 [0m
[1m relation 14 in hsa05214 [0m
[1m relation 15 in hsa05214 [0m
[1m relation 16 in hsa05214 [0m
[1m relation 17 in hsa05214 [0m
[1m relation 18 in hsa05214 [0m
[1m relation 19 in hsa05214 [0m
[1m relation 20 in hsa05214 [0m
[1m relation 21 in hsa05214 [0m
[1m relation 22 in hsa05214 [0m
[1m relation 23 in hsa05214 [0m
[1m relation 24 in hsa05214 [0m
[1m relation 25 in hs

[1m relation 17 in hsa05217 [0m
[1m relation 18 in hsa05217 [0m
[1m relation 19 in hsa05217 [0m
[1m------------------------------------------18.hsa05218------------------------------------------[0m
[1mhsa05218 Refer PMID:
[0m ['16822996', '12894244', '16750612', '15841168', '16001050', '16001072', '16899407', '15009714', '11224709', '15721476', '14695152', '15557758', '10843728']
[1m relation 0 in hsa05218 [0m
[1m relation 1 in hsa05218 [0m
[1m relation 2 in hsa05218 [0m
[1m relation 3 in hsa05218 [0m
[1m relation 4 in hsa05218 [0m
[1m relation 5 in hsa05218 [0m
[1m relation 6 in hsa05218 [0m
[1m relation 7 in hsa05218 [0m
[1m relation 8 in hsa05218 [0m
[1m relation 9 in hsa05218 [0m
[1m relation 10 in hsa05218 [0m
[1m relation 11 in hsa05218 [0m
[1m relation 12 in hsa05218 [0m
[1m relation 13 in hsa05218 [0m
[1m relation 14 in hsa05218 [0m
[1m relation 15 in hsa05218 [0m
[1m relation 16 in hsa05218 [0m
[1m relation 17 in hsa05218 [0m
[1m 

[1m relation 80 in hsa05224 [0m
[1m relation 81 in hsa05224 [0m
[1m relation 82 in hsa05224 [0m
[1m relation 83 in hsa05224 [0m
[1m relation 84 in hsa05224 [0m
[1m relation 85 in hsa05224 [0m
[1m relation 86 in hsa05224 [0m
[1m relation 87 in hsa05224 [0m
[1m relation 88 in hsa05224 [0m
[1m relation 89 in hsa05224 [0m
[1m relation 90 in hsa05224 [0m
[1m relation 91 in hsa05224 [0m
[1m relation 92 in hsa05224 [0m
[1m relation 93 in hsa05224 [0m
[1m relation 94 in hsa05224 [0m
[1m relation 95 in hsa05224 [0m
[1m relation 96 in hsa05224 [0m
[1m relation 97 in hsa05224 [0m
[1m relation 98 in hsa05224 [0m
[1m relation 99 in hsa05224 [0m
[1m relation 100 in hsa05224 [0m
[1m relation 101 in hsa05224 [0m
[1m relation 102 in hsa05224 [0m
[1m relation 103 in hsa05224 [0m
[1m------------------------------------------24.hsa05222------------------------------------------[0m
[1mhsa05222 Refer PMID:
[0m ['12379883', '12741677', '12711111', '12407699

[1mhsa05170 Refer PMID:
[0m ['22749173', '23806647', '20041213', '22085380', '21321123', '15817946', '24822052', '22090097', '26056579', '20672047', '26628325', '25988887', '28358311', '18178131', '24453421', '23986795', '21368875', '12766761', '21994747', '11237054', '22767237', '21762823', '20594957', '27161574', '26950141', '24906315', '15638722', '17140287', '23430691', '19038776', '21835787']
[1m relation 0 in hsa05170 [0m
[1m relation 1 in hsa05170 [0m
[1m relation 2 in hsa05170 [0m
[1m relation 3 in hsa05170 [0m
[1m relation 4 in hsa05170 [0m
[1m relation 5 in hsa05170 [0m
[1m relation 6 in hsa05170 [0m
[1m relation 7 in hsa05170 [0m
[1m relation 8 in hsa05170 [0m
[1m relation 9 in hsa05170 [0m
[1m relation 10 in hsa05170 [0m
[1m relation 11 in hsa05170 [0m
[1m relation 12 in hsa05170 [0m
[1m relation 13 in hsa05170 [0m
[1m relation 14 in hsa05170 [0m
[1m relation 15 in hsa05170 [0m
[1m relation 16 in hsa05170 [0m
[1m relation 17 in hsa05170 

[1m relation 71 in hsa05160 [0m
[1m relation 72 in hsa05160 [0m
[1m relation 73 in hsa05160 [0m
[1m relation 74 in hsa05160 [0m
[1m relation 75 in hsa05160 [0m
[1m relation 76 in hsa05160 [0m
[1m relation 77 in hsa05160 [0m
[1m relation 78 in hsa05160 [0m
[1m relation 79 in hsa05160 [0m
[1m------------------------------------------30.hsa05171------------------------------------------[0m
[1mhsa05171 Refer PMID:
[0m ['33132005', '33077917', '32820801', '32353634', '33014208', '32544563', '32635353', '32376392', '32722596', '32922297', '33055229', '32973803', '32346093', '32979938', '32661197', '32978971', '32467561', '32687918', '32376901', '32246101', '32433641', '32423094', '33184193', '32327719', '32461141', '32559343', '32439870', '32643798', '32608159', '32558620', '32586214', '32995777', '32693241', '32405269', '32337664']
[1m relation 0 in hsa05171 [0m
[1m relation 1 in hsa05171 [0m
[1m relation 2 in hsa05171 [0m
[1m relation 3 in hsa05171 [0m
[1m re

[1mhsa05168 Refer PMID:
[0m ['12787566', '19495760', '20221426', '23150579', '25375272', '15056211', '12855695', '24262390', '26952111', '21994567', '15276609', '29298887', '19344302', '23115300', '17229694', '12826401', '27367734', '15163721', '23864622', '21994592', '21146386', '12960333', '25798530', '28468873', '18508584', '15078920', '27154074']
[1m relation 0 in hsa05168 [0m
[1m relation 1 in hsa05168 [0m
[1m relation 2 in hsa05168 [0m
[1m relation 3 in hsa05168 [0m
[1m relation 4 in hsa05168 [0m
[1m relation 5 in hsa05168 [0m
[1m relation 6 in hsa05168 [0m
[1m relation 7 in hsa05168 [0m
[1m relation 8 in hsa05168 [0m
[1m relation 9 in hsa05168 [0m
[1m relation 10 in hsa05168 [0m
[1m relation 11 in hsa05168 [0m
[1m relation 12 in hsa05168 [0m
[1m relation 13 in hsa05168 [0m
[1m relation 14 in hsa05168 [0m
[1m relation 15 in hsa05168 [0m
[1m relation 16 in hsa05168 [0m
[1m relation 17 in hsa05168 [0m
[1m relation 18 in hsa05168 [0m
[1m rela

[1mhsa05167 Refer PMID:
[0m ['22403573', '18948197', '21040832', '16760382', '19230674', '16905191', '11500830', '10666184', '26431609', '10377196', '18434408', '15692053', '15967719', '16828973', '12032073', '10510092', '10523854', '12890756', '20865011', '10799607', '17166914', '18230726', '11413168', '24710493', '10354594', '9594662', '12770701', '11960692', '15220441', '10987301', '27518127', '15573137', '17329974', '23028325', '10318948', '26192396', '25187543', '17050609', '16585191', '22521915', '25594835', '14991150', '26870016']
[1m relation 0 in hsa05167 [0m
[1m relation 1 in hsa05167 [0m
[1m relation 2 in hsa05167 [0m
[1m relation 3 in hsa05167 [0m
[1m relation 4 in hsa05167 [0m
[1m relation 5 in hsa05167 [0m
[1m relation 6 in hsa05167 [0m
[1m relation 7 in hsa05167 [0m
[1m relation 8 in hsa05167 [0m
[1m relation 9 in hsa05167 [0m
[1m relation 10 in hsa05167 [0m
[1m relation 11 in hsa05167 [0m
[1m relation 12 in hsa05167 [0m
[1m relation 13 in hs

[1mhsa05165 Refer PMID:
[0m ['20592731', '25752815', '26008697', '19684440', '23164805', '19156753', '17645777', '11486705', '10793105', '16259056', '15956361', '15342967', '24316445', '26295406', '26289783', '26797638', '26147797', '21954943', '23175122', '19029942', '19901438', '16862386', '26022660', '22078316', '11753669', '22262402', '23731971', '23403708', '26643553', '19861444', '11325599', '10702232', '22499770', '9990017', '15175323', '18505829', '23536685']
[1m relation 0 in hsa05165 [0m
[1m relation 1 in hsa05165 [0m
[1m relation 2 in hsa05165 [0m
[1m relation 3 in hsa05165 [0m
[1m relation 4 in hsa05165 [0m
[1m relation 5 in hsa05165 [0m
[1m relation 6 in hsa05165 [0m
[1m relation 7 in hsa05165 [0m
[1m relation 8 in hsa05165 [0m
[1m relation 9 in hsa05165 [0m
[1m relation 10 in hsa05165 [0m
[1m relation 11 in hsa05165 [0m
[1m relation 12 in hsa05165 [0m
[1m relation 13 in hsa05165 [0m
[1m relation 14 in hsa05165 [0m
[1m relation 15 in hsa051

[1m relation 69 in hsa05130 [0m
[1m relation 70 in hsa05130 [0m
[1m relation 71 in hsa05130 [0m
[1m relation 72 in hsa05130 [0m
[1m relation 73 in hsa05130 [0m
[1m relation 74 in hsa05130 [0m
[1m relation 75 in hsa05130 [0m
[1m relation 76 in hsa05130 [0m
[1m relation 77 in hsa05130 [0m
[1m relation 78 in hsa05130 [0m
[1m relation 79 in hsa05130 [0m
[1m relation 80 in hsa05130 [0m
[1m relation 81 in hsa05130 [0m
[1m relation 82 in hsa05130 [0m
[1m relation 83 in hsa05130 [0m
[1m relation 84 in hsa05130 [0m
[1m relation 85 in hsa05130 [0m
[1m relation 86 in hsa05130 [0m
[1m relation 87 in hsa05130 [0m
[1m relation 88 in hsa05130 [0m
[1m relation 89 in hsa05130 [0m
[1m relation 90 in hsa05130 [0m
[1m relation 91 in hsa05130 [0m
[1m relation 92 in hsa05130 [0m
[1m------------------------------------------41.hsa05132------------------------------------------[0m
[1mhsa05132 Refer PMID:
[0m ['28875943', '28848711', '31402916', '27974148', '

[1m------------------------------------------43.hsa05135------------------------------------------[0m
[1mhsa05135 Refer PMID:
[0m ['26981193', '29477730', '19339545', '17257058', '10785635', '15044442', '12077360', '11734206', '15516995', '27569559', '15060067', '10489373', '16301742', '16339560', '16845370', '26651944', '24034616', '14623872']
[1m relation 0 in hsa05135 [0m
[1m relation 1 in hsa05135 [0m
[1m relation 2 in hsa05135 [0m
[1m relation 3 in hsa05135 [0m
[1m relation 4 in hsa05135 [0m
[1m relation 5 in hsa05135 [0m
[1m relation 6 in hsa05135 [0m
[1m relation 7 in hsa05135 [0m
[1m relation 8 in hsa05135 [0m
[1m relation 9 in hsa05135 [0m
[1m relation 10 in hsa05135 [0m
[1m relation 11 in hsa05135 [0m
[1m relation 12 in hsa05135 [0m
[1m relation 13 in hsa05135 [0m
[1m relation 14 in hsa05135 [0m
[1m relation 15 in hsa05135 [0m
[1m relation 16 in hsa05135 [0m
[1m relation 17 in hsa05135 [0m
[1m relation 18 in hsa05135 [0m
[1m relation

[1mhsa05152 Refer PMID:
[0m ['21274449', '20415982', '11932234', '18288961', '17359235', '21274433', '15083155', '20199890', '15228522', '19563525', '19369951', '18541216', '19634703', '16200082', '17785780', '18407066', '11378475']
[1m relation 0 in hsa05152 [0m
[1m relation 1 in hsa05152 [0m
[1m relation 2 in hsa05152 [0m
[1m relation 3 in hsa05152 [0m
[1m relation 4 in hsa05152 [0m
[1m relation 5 in hsa05152 [0m
[1m relation 6 in hsa05152 [0m
[1m relation 7 in hsa05152 [0m
[1m relation 8 in hsa05152 [0m
[1m relation 9 in hsa05152 [0m
[1m relation 10 in hsa05152 [0m
[1m relation 11 in hsa05152 [0m
[1m relation 12 in hsa05152 [0m
[1m relation 13 in hsa05152 [0m
[1m relation 14 in hsa05152 [0m
[1m relation 15 in hsa05152 [0m
[1m relation 16 in hsa05152 [0m
[1m relation 17 in hsa05152 [0m
[1m relation 18 in hsa05152 [0m
[1m relation 19 in hsa05152 [0m
[1m relation 20 in hsa05152 [0m
[1m relation 21 in hsa05152 [0m
[1m relation 22 in hsa0515

[1mhsa05140 Refer PMID:
[0m ['11890722', '10770277', '7591091', '10417174', '7916951', '15721837', '17543969', '15983048', '15322192', '18070909', '15039466', '10586030', '15831826', '11298294', '12438359', '9504340', '18793215', '10462516', '15639739', '16848789', '11728310', '10608501', '14762510']
[1m relation 0 in hsa05140 [0m
[1m relation 1 in hsa05140 [0m
[1m relation 2 in hsa05140 [0m
[1m relation 3 in hsa05140 [0m
[1m relation 4 in hsa05140 [0m
[1m relation 5 in hsa05140 [0m
[1m relation 6 in hsa05140 [0m
[1m relation 7 in hsa05140 [0m
[1m relation 8 in hsa05140 [0m
[1m relation 9 in hsa05140 [0m
[1m relation 10 in hsa05140 [0m
[1m relation 11 in hsa05140 [0m
[1m relation 12 in hsa05140 [0m
[1m relation 13 in hsa05140 [0m
[1m relation 14 in hsa05140 [0m
[1m relation 15 in hsa05140 [0m
[1m relation 16 in hsa05140 [0m
[1m relation 17 in hsa05140 [0m
[1m relation 18 in hsa05140 [0m
[1m relation 19 in hsa05140 [0m
[1m relation 20 in hsa051

[1mhsa05330 Refer PMID:
[0m ['10746853', '12023610', '17445565', '14710779', '8835634', '10571983', '10331992', '10667806', '12216939', '12563298']
[1m relation 0 in hsa05330 [0m
[1m relation 1 in hsa05330 [0m
[1m relation 2 in hsa05330 [0m
[1m relation 3 in hsa05330 [0m
[1m relation 4 in hsa05330 [0m
[1m relation 5 in hsa05330 [0m
[1m relation 6 in hsa05330 [0m
[1m------------------------------------------61.hsa05332------------------------------------------[0m
[1mhsa05332 Refer PMID:
[0m ['16412784', '14735553', '17900507', '15882431', '12359826', '14556773', '16147539', '15372473', '17784964']
[1m relation 0 in hsa05332 [0m
[1m relation 1 in hsa05332 [0m
[1m relation 2 in hsa05332 [0m
[1m relation 3 in hsa05332 [0m
[1m------------------------------------------62.hsa05340------------------------------------------[0m
[1mhsa05340 Refer PMID:
[0m ['16261175', '11941303', '17960151', '17952897', '18093537', '14699405', '11058677', '15661024', '14647478']


[1mhsa05014 Refer PMID:
[0m ['28700839', '28512398', '30879475', '30870681', '23415570', '16924260', '16723044', '28669745', '32344665', '16372325', '16603792', '17051205', '16909017', '14569202', '15310460', '12595144', '17434459', '27649160', '19230774', '16026864', '11239414', '11870681', '16713195', '30837838', '29626651', '26264610', '28148298', '28878620', '17409386', '17566607', '26388731', '25991442', '32119873', '28057298', '24619348', '27815720', '29476642', '21722302', '16472115', '18519638', '21834058', '31319884', '29605155', '25071440', '29299811', '30242016', '23361386']
[1m relation 0 in hsa05014 [0m
[1m relation 1 in hsa05014 [0m
[1m relation 2 in hsa05014 [0m
[1m relation 3 in hsa05014 [0m
[1m relation 4 in hsa05014 [0m
[1m relation 5 in hsa05014 [0m
[1m relation 6 in hsa05014 [0m
[1m relation 7 in hsa05014 [0m
[1m relation 8 in hsa05014 [0m
[1m relation 9 in hsa05014 [0m
[1m relation 10 in hsa05014 [0m
[1m relation 11 in hsa05014 [0m
[1m re

[1m relation 0 in hsa05020 [0m
[1m relation 1 in hsa05020 [0m
[1m relation 2 in hsa05020 [0m
[1m relation 3 in hsa05020 [0m
[1m relation 4 in hsa05020 [0m
[1m relation 5 in hsa05020 [0m
[1m relation 6 in hsa05020 [0m
[1m relation 7 in hsa05020 [0m
[1m relation 8 in hsa05020 [0m
[1m relation 9 in hsa05020 [0m
[1m relation 10 in hsa05020 [0m
[1m relation 11 in hsa05020 [0m
[1m relation 12 in hsa05020 [0m
[1m relation 13 in hsa05020 [0m
[1m relation 14 in hsa05020 [0m
[1m relation 15 in hsa05020 [0m
[1m relation 16 in hsa05020 [0m
[1m relation 17 in hsa05020 [0m
[1m relation 18 in hsa05020 [0m
[1m relation 19 in hsa05020 [0m
[1m relation 20 in hsa05020 [0m
[1m relation 21 in hsa05020 [0m
[1m relation 22 in hsa05020 [0m
[1m relation 23 in hsa05020 [0m
[1m relation 24 in hsa05020 [0m
[1m relation 25 in hsa05020 [0m
[1m relation 26 in hsa05020 [0m
[1m relation 27 in hsa05020 [0m
[1m relation 28 in hsa05020 [0m
[1m relation 29 in hsa0

[1m relation 0 in hsa05030 [0m
[1m relation 1 in hsa05030 [0m
[1m relation 2 in hsa05030 [0m
[1m relation 3 in hsa05030 [0m
[1m relation 4 in hsa05030 [0m
[1m relation 5 in hsa05030 [0m
[1m relation 6 in hsa05030 [0m
[1m relation 7 in hsa05030 [0m
[1m relation 8 in hsa05030 [0m
[1m relation 9 in hsa05030 [0m
[1m relation 10 in hsa05030 [0m
[1m relation 11 in hsa05030 [0m
[1m relation 12 in hsa05030 [0m
[1m relation 13 in hsa05030 [0m
[1m relation 14 in hsa05030 [0m
[1m relation 15 in hsa05030 [0m
[1m relation 16 in hsa05030 [0m
[1m relation 17 in hsa05030 [0m
[1m relation 18 in hsa05030 [0m
[1m relation 19 in hsa05030 [0m
[1m relation 20 in hsa05030 [0m
[1m relation 21 in hsa05030 [0m
[1m relation 22 in hsa05030 [0m
[1m relation 23 in hsa05030 [0m
[1m relation 24 in hsa05030 [0m
[1m relation 25 in hsa05030 [0m
[1m relation 26 in hsa05030 [0m
[1m relation 27 in hsa05030 [0m
[1m relation 28 in hsa05030 [0m
[1m relation 29 in hsa0

[1m relation 0 in hsa05417 [0m
[1m relation 1 in hsa05417 [0m
[1m relation 2 in hsa05417 [0m
[1m relation 3 in hsa05417 [0m
[1m relation 4 in hsa05417 [0m
[1m relation 5 in hsa05417 [0m
[1m relation 6 in hsa05417 [0m
[1m relation 7 in hsa05417 [0m
[1m relation 8 in hsa05417 [0m
[1m relation 9 in hsa05417 [0m
[1m relation 10 in hsa05417 [0m
[1m relation 11 in hsa05417 [0m
[1m relation 12 in hsa05417 [0m
[1m relation 13 in hsa05417 [0m
[1m relation 14 in hsa05417 [0m
[1m relation 15 in hsa05417 [0m
[1m relation 16 in hsa05417 [0m
[1m relation 17 in hsa05417 [0m
[1m relation 18 in hsa05417 [0m
[1m relation 19 in hsa05417 [0m
[1m relation 20 in hsa05417 [0m
[1m relation 21 in hsa05417 [0m
[1m relation 22 in hsa05417 [0m
[1m relation 23 in hsa05417 [0m
[1m relation 24 in hsa05417 [0m
[1m relation 25 in hsa05417 [0m
[1m relation 26 in hsa05417 [0m
[1m relation 27 in hsa05417 [0m
[1m relation 28 in hsa05417 [0m
[1m relation 29 in hsa0

[1m relation 79 in hsa05418 [0m
[1m relation 80 in hsa05418 [0m
[1m relation 81 in hsa05418 [0m
[1m relation 82 in hsa05418 [0m
[1m relation 83 in hsa05418 [0m
[1m relation 84 in hsa05418 [0m
[1m relation 85 in hsa05418 [0m
[1m relation 86 in hsa05418 [0m
[1m relation 87 in hsa05418 [0m
[1m relation 88 in hsa05418 [0m
[1m relation 89 in hsa05418 [0m
[1m relation 90 in hsa05418 [0m
[1m relation 91 in hsa05418 [0m
[1m relation 92 in hsa05418 [0m
[1m relation 93 in hsa05418 [0m
[1m relation 94 in hsa05418 [0m
[1m relation 95 in hsa05418 [0m
[1m relation 96 in hsa05418 [0m
[1m------------------------------------------77.hsa05410------------------------------------------[0m
[1mhsa05410 Refer PMID:
[0m ['12270949', '12844200', '14711353', '10615904', '11371514', '11716909', '18056765', '19136482', '11805847', '11225594', '18572189']
[1m relation 0 in hsa05410 [0m
[1m relation 1 in hsa05410 [0m
[1m relation 2 in hsa05410 [0m
[1m relation 3 in hsa

[1mhsa04940 Refer PMID:
[0m ['9719467', '11507694', '8072542', '8786033', '12796471', '15889095', '15889096', '14617043', '12752668']
[1m relation 0 in hsa04940 [0m
[1m relation 1 in hsa04940 [0m
[1m------------------------------------------84.hsa04950------------------------------------------[0m
[1mhsa04950 Refer PMID:
[0m ['15298336', '15604203', '15028942', '14614204']
[1m relation 0 in hsa04950 [0m
[1m relation 1 in hsa04950 [0m
[1m relation 2 in hsa04950 [0m
[1m relation 3 in hsa04950 [0m
[1m relation 4 in hsa04950 [0m
[1m relation 5 in hsa04950 [0m
[1m relation 6 in hsa04950 [0m
[1m relation 7 in hsa04950 [0m
[1m relation 8 in hsa04950 [0m
[1m relation 9 in hsa04950 [0m
[1m relation 10 in hsa04950 [0m
[1m relation 11 in hsa04950 [0m
[1m relation 12 in hsa04950 [0m
[1m relation 13 in hsa04950 [0m
[1m relation 14 in hsa04950 [0m
[1m relation 15 in hsa04950 [0m
[1m relation 16 in hsa04950 [0m
[1m relation 17 in hsa04950 [0m
[1m relation

[1mhsa04933 Refer PMID:
[0m ['21261520', '21111800', '23961320', '22558488', '19404313', '11180401', '21680901', '11684565', '16894049', '21590515', '21440603', '23776698', '17005604', '20300563', '20470857', '18922799', '11375353', '12709399', '18323529', '15569303']
[1m relation 0 in hsa04933 [0m
[1m relation 1 in hsa04933 [0m
[1m relation 2 in hsa04933 [0m
[1m relation 3 in hsa04933 [0m
[1m relation 4 in hsa04933 [0m
[1m relation 5 in hsa04933 [0m
[1m relation 6 in hsa04933 [0m
[1m relation 7 in hsa04933 [0m
[1m relation 8 in hsa04933 [0m
[1m relation 9 in hsa04933 [0m
[1m relation 10 in hsa04933 [0m
[1m relation 11 in hsa04933 [0m
[1m relation 12 in hsa04933 [0m
[1m relation 13 in hsa04933 [0m
[1m relation 14 in hsa04933 [0m
[1m relation 15 in hsa04933 [0m
[1m relation 16 in hsa04933 [0m
[1m relation 17 in hsa04933 [0m
[1m relation 18 in hsa04933 [0m
[1m relation 19 in hsa04933 [0m
[1m relation 20 in hsa04933 [0m
[1m relation 21 in hsa04

[1m relation 83 in hsa01521 [0m
[1m relation 84 in hsa01521 [0m
[1m relation 85 in hsa01521 [0m
[1m relation 86 in hsa01521 [0m
[1m relation 87 in hsa01521 [0m
[1m relation 88 in hsa01521 [0m
[1m relation 89 in hsa01521 [0m
[1m relation 90 in hsa01521 [0m
[1m relation 91 in hsa01521 [0m
[1m relation 92 in hsa01521 [0m
[1m relation 93 in hsa01521 [0m
[1m relation 94 in hsa01521 [0m
[1m relation 95 in hsa01521 [0m
[1m relation 96 in hsa01521 [0m
[1m relation 97 in hsa01521 [0m
[1m relation 98 in hsa01521 [0m
[1m relation 99 in hsa01521 [0m
[1m relation 100 in hsa01521 [0m
[1m relation 101 in hsa01521 [0m
[1m relation 102 in hsa01521 [0m
[1m relation 103 in hsa01521 [0m
[1m relation 104 in hsa01521 [0m
[1m relation 105 in hsa01521 [0m
[1m relation 106 in hsa01521 [0m
[1m relation 107 in hsa01521 [0m
[1m relation 108 in hsa01521 [0m
[1m relation 109 in hsa01521 [0m
[1m relation 110 in hsa01521 [0m
[1m relation 111 in hsa01521 [0m
[

In [9]:
fin_pathway_list = list(set(Ref_PMID_DF.pathway))
print('final pathway used:',len(fin_pathway_list))
fin_pathway_list

final pathway used: 93


['hsa05321',
 'hsa05030',
 'hsa05033',
 'hsa05224',
 'hsa05133',
 'hsa05130',
 'hsa05016',
 'hsa05416',
 'hsa05162',
 'hsa01522',
 'hsa05169',
 'hsa05230',
 'hsa05332',
 'hsa05418',
 'hsa05017',
 'hsa05215',
 'hsa05135',
 'hsa05200',
 'hsa01524',
 'hsa05226',
 'hsa05143',
 'hsa05222',
 'hsa05014',
 'hsa05412',
 'hsa05031',
 'hsa05231',
 'hsa05020',
 'hsa05142',
 'hsa05204',
 'hsa05417',
 'hsa05163',
 'hsa05160',
 'hsa05203',
 'hsa05210',
 'hsa05223',
 'hsa05144',
 'hsa05166',
 'hsa05414',
 'hsa04932',
 'hsa05219',
 'hsa04934',
 'hsa05211',
 'hsa05340',
 'hsa05167',
 'hsa04930',
 'hsa05330',
 'hsa05323',
 'hsa04931',
 'hsa01521',
 'hsa05110',
 'hsa05202',
 'hsa05120',
 'hsa05100',
 'hsa05220',
 'hsa05146',
 'hsa04933',
 'hsa01523',
 'hsa05322',
 'hsa05150',
 'hsa05145',
 'hsa05022',
 'hsa05410',
 'hsa05214',
 'hsa05320',
 'hsa05235',
 'hsa05225',
 'hsa05168',
 'hsa04950',
 'hsa05205',
 'hsa05034',
 'hsa05134',
 'hsa05216',
 'hsa05170',
 'hsa05415',
 'hsa05131',
 'hsa05212',
 'hsa05213',

In [733]:
Ref_PMID_DF

Unnamed: 0,kegg_pmid,pathway
0,10647931,hsa05200
1,15000146,hsa05200
2,16555243,hsa05200
3,11078609,hsa05200
4,16699851,hsa05200
...,...,...
14,24415977,hsa01522
15,15613444,hsa01522
16,24565562,hsa01522
17,20960099,hsa01522


In [734]:
hsa05171_xml_relation_list

Unnamed: 0,gene1_id,gene2_id,gene1,gene2,gene1_hsa,gene2_hsa,gene1_type,gene2_type,relation_type,relation_type2,pathway
4,201,202,"ADAM17, ADAM18, CD156B, CSVP, NISBD, NISBD1, TACE","TNF, DIF, TNF-alpha, TNFA, TNFSF2, TNLG1F",hsa:6868,hsa:7124,gene,gene,activation,PPrel,hsa05171
5,201,412,"ADAM17, ADAM18, CD156B, CSVP, NISBD, NISBD1, TACE","HBEGF, DTR, DTS, DTSF, HEGFL",hsa:6868,hsa:1839,gene,gene,activation,PPrel,hsa05171
6,201,203,"ADAM17, ADAM18, CD156B, CSVP, NISBD, NISBD1, TACE","IL6R, CD126, HIES5, IL-6R-1, IL-6RA, IL6Q, IL6...",hsa:6868,hsa:3570,gene,gene,activation,PPrel,hsa05171
7,416,425,"TNF, DIF, TNF-alpha, TNFA, TNFSF2, TNLG1F","TNFRSF1A, CD120a, FPF, TBP1, TNF-R, TNF-R-I, T...",hsa:7124,hsa:7132,gene,gene,activation,PPrel,hsa05171
8,418,426,"HBEGF, DTR, DTS, DTSF, HEGFL","EGFR, ERBB, ERBB1, HER1, NISBD2, PIG61, mENA",hsa:1839,hsa:1956,gene,gene,activation,PPrel,hsa05171
...,...,...,...,...,...,...,...,...,...,...,...
84,303,244,"CHUK, IKBKA, IKK-alpha, IKK1, IKKA, NFKBIKA, T...","NFKBIA, EDAID2, IKBA, MAD-3, NFKBI...",hsa:1147 hsa:3551 hsa:8517,hsa:4792 hsa:4793,gene,gene,"activation, phosphorylation",PPrel,hsa05171
86,257,258,"JAK1, AIIDE, JAK1A, JAK1B, JTK3...","STAT1, CANDF7, IMD31A, IMD31B, IMD31C, ISGF-3,...",hsa:3716 hsa:7297,hsa:6772,gene,gene,"activation, phosphorylation",PPrel,hsa05171
87,257,350,"JAK1, AIIDE, JAK1A, JAK1B, JTK3...","STAT2, IMD44, ISGF-3, P113, PTORCH3, STAT113",hsa:3716 hsa:7297,hsa:6773,gene,gene,"activation, phosphorylation",PPrel,hsa05171
89,444,681,"NFKB1, CVID12, EBP-1, KBF1, NF-kB, NF-kB1, NF-...","IL6, BSF-2, BSF2, CDF, HGF, HSF, IFN-beta-2, I...",hsa:4790 hsa:5970,hsa:3569,gene,gene,expression,GErel,hsa05171


In [735]:
hsa05171_gene_symbol_list

Unnamed: 0,gene_id,gene,gene_hsa,gene_type,relation_type,pathway
4,201,"ADAM17, ADAM18, CD156B, CSVP, NISBD, NISBD1, TACE",hsa:6868,gene,activation,hsa05171
7,416,"TNF, DIF, TNF-alpha, TNFA, TNFSF2, TNLG1F",hsa:7124,gene,activation,hsa05171
8,418,"HBEGF, DTR, DTS, DTSF, HEGFL",hsa:1839,gene,activation,hsa05171
9,428,"IL6, BSF-2, BSF2, CDF, HGF, HSF, IFN-beta-2, I...",hsa:3569,gene,activation,hsa05171
10,429,"IL6R, CD126, HIES5, IL-6R-1, IL-6RA, IL6Q, IL6...",hsa:3570,gene,activation,hsa05171
...,...,...,...,...,...,...
84,244,"NFKBIA, EDAID2, IKBA, MAD-3, NFKBI...",hsa:4792 hsa:4793,gene,"activation, phosphorylation",hsa05171
86,258,"STAT1, CANDF7, IMD31A, IMD31B, IMD31C, ISGF-3,...",hsa:6772,gene,"activation, phosphorylation",hsa05171
87,350,"STAT2, IMD44, ISGF-3, P113, PTORCH3, STAT113",hsa:6773,gene,"activation, phosphorylation",hsa05171
89,681,"IL6, BSF-2, BSF2, CDF, HGF, HSF, IFN-beta-2, I...",hsa:3569,gene,expression,hsa05171


#### save data result


In [14]:
%cd C:/Users/larry/Desktop/jupyter/Interaction_corpus/kegg_pathway_relation_gene
!ls

C:\Users\larry\Desktop\jupyter\Interaction corpus\kegg_pathway_relation_gene
Ref_PMID_DF.txt
hsa01521_gene_symbol_list.txt
hsa01521_xml_relation_list.txt
hsa01522_gene_symbol_list.txt
hsa01522_xml_relation_list.txt
hsa01523_gene_symbol_list.txt
hsa01523_xml_relation_list.txt
hsa01524_gene_symbol_list.txt
hsa01524_xml_relation_list.txt
hsa04930_gene_symbol_list.txt
hsa04930_xml_relation_list.txt
hsa04931_gene_symbol_list.txt
hsa04931_xml_relation_list.txt
hsa04932_gene_symbol_list.txt
hsa04932_xml_relation_list.txt
hsa04933_gene_symbol_list.txt
hsa04933_xml_relation_list.txt
hsa04934_gene_symbol_list.txt
hsa04934_xml_relation_list.txt
hsa04940_gene_symbol_list.txt
hsa04940_xml_relation_list.txt
hsa04950_gene_symbol_list.txt
hsa04950_xml_relation_list.txt
hsa05010_gene_symbol_list.txt
hsa05010_xml_relation_list.txt
hsa05012_gene_symbol_list.txt
hsa05012_xml_relation_list.txt
hsa05014_gene_symbol_list.txt
hsa05014_xml_relation_list.txt
hsa05016_gene_symbol_list.txt
hsa05016_xml_relation_l

In [738]:
# 儲存變數
idd = 0
for pathway in fin_pathway_list: 

    rel_name = '{}_xml_relation_list'.format(pathway)
    save_variable(globals()[rel_name], rel_name+'.txt')
    gene_name = '{}_gene_symbol_list'.format(pathway)
    save_variable(globals()[gene_name], gene_name+'.txt')
    
    idd+=1
    print(idd,pathway,'Done')

save_variable(Ref_PMID_DF, 'Ref_PMID_DF.txt')

1 hsa04931 Done
2 hsa05032 Done
3 hsa05226 Done
4 hsa05034 Done
5 hsa05150 Done
6 hsa04933 Done
7 hsa05164 Done
8 hsa05418 Done
9 hsa05145 Done
10 hsa05140 Done
11 hsa05231 Done
12 hsa05200 Done
13 hsa05168 Done
14 hsa04930 Done
15 hsa05016 Done
16 hsa05320 Done
17 hsa05010 Done
18 hsa05214 Done
19 hsa05212 Done
20 hsa05202 Done
21 hsa05416 Done
22 hsa05030 Done
23 hsa05215 Done
24 hsa01524 Done
25 hsa05235 Done
26 hsa05204 Done
27 hsa05216 Done
28 hsa05417 Done
29 hsa05210 Done
30 hsa05206 Done
31 hsa05330 Done
32 hsa05415 Done
33 hsa05203 Done
34 hsa05132 Done
35 hsa05321 Done
36 hsa01521 Done
37 hsa05152 Done
38 hsa05323 Done
39 hsa05332 Done
40 hsa05340 Done
41 hsa05169 Done
42 hsa05130 Done
43 hsa05223 Done
44 hsa05143 Done
45 hsa05322 Done
46 hsa05170 Done
47 hsa05120 Done
48 hsa05211 Done
49 hsa05110 Done
50 hsa05171 Done
51 hsa05230 Done
52 hsa05017 Done
53 hsa05205 Done
54 hsa05142 Done
55 hsa05414 Done
56 hsa04950 Done
57 hsa05100 Done
58 hsa05219 Done
59 hsa05218 Done
60 hsa

'Ref_PMID_DF.txt'

In [18]:
# 讀取變數
idd = 0
fin_pathway_list = []
for pathway in pathway_list: 
    try:
        rel_name = '{}_xml_relation_list'.format(pathway)
        globals()['{}_xml_relation_list'.format(pathway)] = load_variavle(rel_name+'.txt')
        gene_name = '{}_gene_symbol_list'.format(pathway)
        globals()['{}_gene_symbol_list'.format(pathway)] = load_variavle(gene_name+'.txt')
        
        idd+=1
        fin_pathway_list.append(pathway)
        print(idd,pathway,'Done')
    except:
        continue
    
Ref_PMID_DF = load_variavle('Ref_PMID_DF.txt')

1 hsa05200 Done
2 hsa05202 Done
3 hsa05206 Done
4 hsa05205 Done
5 hsa05204 Done
6 hsa05203 Done
7 hsa05230 Done
8 hsa05231 Done
9 hsa05235 Done
10 hsa05210 Done
11 hsa05212 Done
12 hsa05225 Done
13 hsa05226 Done
14 hsa05214 Done
15 hsa05216 Done
16 hsa05221 Done
17 hsa05220 Done
18 hsa05217 Done
19 hsa05218 Done
20 hsa05211 Done
21 hsa05219 Done
22 hsa05215 Done
23 hsa05213 Done
24 hsa05224 Done
25 hsa05222 Done
26 hsa05223 Done
27 hsa05166 Done
28 hsa05170 Done
29 hsa05161 Done
30 hsa05160 Done
31 hsa05171 Done
32 hsa05164 Done
33 hsa05162 Done
34 hsa05168 Done
35 hsa05163 Done
36 hsa05167 Done
37 hsa05169 Done
38 hsa05165 Done
39 hsa05110 Done
40 hsa05120 Done
41 hsa05130 Done
42 hsa05132 Done
43 hsa05131 Done
44 hsa05135 Done
45 hsa05133 Done
46 hsa05134 Done
47 hsa05150 Done
48 hsa05152 Done
49 hsa05100 Done
50 hsa05146 Done
51 hsa05144 Done
52 hsa05145 Done
53 hsa05140 Done
54 hsa05142 Done
55 hsa05143 Done
56 hsa05310 Done
57 hsa05322 Done
58 hsa05323 Done
59 hsa05320 Done
60 hsa

### Get PubTator Annotation
https://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/PubTator/tutorial/index.html

In [19]:
PMID = []
for i in range(0,len(fin_pathway_list)): #迴圈，某一條pathway
    print("\033[1m↓↓↓{:=^100s}↓↓↓\033[0m\n".format(pathway))
    
    pathway = fin_pathway_list[i] #迴圈，某一條pathway
    sub_ref_df = Ref_PMID_DF[Ref_PMID_DF.pathway==pathway] #迴圈list，參考摘要
    
    for j in sub_ref_df.index: #迴圈，某一篇參考摘要
        Abs_pmid = sub_ref_df.kegg_pmid[j]#迴圈，某一篇參考摘要
        print(Abs_pmid)
        PMID.append(Abs_pmid)


10647931
15000146
16555243
11078609
16699851
12621137
12951588
10505543
16042571
15349822
9196022
12737309
11459867
15573119
15310786
11223406
15711891
11170304
16326109
15479695
11477132
12459728
11407945
12946833
16702400
10740269
12648469
14967450
14586404
12545153
14580692
12079267
15774796
15823750
11707511
15639402
12154354
11253051
16700615
12762887
16095998
11057895
9643506
12084351
16557281
16551846
16946003
17062879
16242838
16189702
11165748
12094241
16236521
12563308
16146838
12951584
16642045
15625120
11721960
12468433
17554387
16352814
15719031
11071626
14982876
10403855
15156182
16484590
9834202
12801837
11160144
12244301
15948971
12588368
15978322
15468170
16881963
16306523
11276000
15270885
14737121
15656799
14556242
11736900
16054233
11130178
16822996
12894244
16750612
15841168
16001050
16001072
16899407
15009714
11224709
15721476
14695152
15557758
10843728
16339096
15122209
14634372
14685170
15611513
17211469
17287871
12351585
17158541
16474624
16110317
11103787
128

In [20]:
len(PMID)

2178

In [623]:
# Get PubTator Annotation with API (5 hours) *跳至讀取結果
print ("Start : %s" % time.ctime())

i=0
for p in range(0,len(PMID)): 
    
    url = 'https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids={}&concepts=gene'.format(PMID[p])
    ## Step1.取得網頁連線
    try:
        # 休息才能走更遠的路
        slp = random.randint(5,10)
        time.sleep(slp)
        # keep going
        r = requests.get(url) #未經驗證請求網址(!!!)
        print(p,url,'{}s'.format(slp))

    except TimeoutError:
        # 休息不夠，更多的休息
        print('need long time sleep')
        time.sleep(600)
        # keep going
        r = requests.get(url)
        print(p,url,'Sleep 600s.......')
        
        
    # 連線是否正常
    if r.status_code!=200:
        print(p,PMID[p],"不正常")
        continue


    # Step2.使用bs4回傳網頁(html)內容
    resp = BeautifulSoup(r.text,"lxml")


    # Step3.取得標題所在的標籤內容
    title = resp.find('p')
    if title==None:#跳過沒有內容的摘要
        print(PMID[p],'no information,skip')#105
        continue
    if len(str(title))<500:#跳過沒有摘要
        print(PMID[p],'no abstract,skip') #117
        continue
    Abs = title.text[:]
    
    i+=1
    # step4.儲存所有PubTator結果
    globals()["pub_ent_{}".format(PMID[p])] = Abs # pub_ent_21316027


print ("End : %s" % time.ctime())
# result

Start : Fri Apr 23 02:08:26 2021
0 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=22360800&concepts=gene 6s
1 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=12765939&concepts=gene 5s
2 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=20526383&concepts=gene 5s
3 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=7666792&concepts=gene 8s
4 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15166380&concepts=gene 9s
5 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=22675355&concepts=gene 9s
6 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=17130651&concepts=gene 10s
7 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=16186396&concepts=gene 10s
8 https://www.ncbi.nlm.nih.gov/research/pubtator-api/p

71 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=17270154&concepts=gene 9s
72 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=18310464&concepts=gene 6s
73 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=18616668&concepts=gene 7s
74 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19077056&concepts=gene 5s
75 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=20626732&concepts=gene 7s
76 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=20178484&concepts=gene 7s
77 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=16322743&concepts=gene 9s
78 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=20062962&concepts=gene 7s
79 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtat

142 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=30254192&concepts=gene 8s
143 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=30545063&concepts=gene 7s
144 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=21253874&concepts=gene 8s
145 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=26008706&concepts=gene 8s
146 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=30079062&concepts=gene 6s
147 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=23236273&concepts=gene 9s
148 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=21050140&concepts=gene 5s
149 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=26597822&concepts=gene 6s
26597822 no abstract,skip
150 https://www.ncbi.nlm.nih.gov/research/pubt

212 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=16848789&concepts=gene 7s
213 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11728310&concepts=gene 9s
214 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=10608501&concepts=gene 9s
215 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=14762510&concepts=gene 9s
216 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=22089420&concepts=gene 6s
217 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19935796&concepts=gene 10s
218 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=22429397&concepts=gene 6s
219 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19264150&concepts=gene 10s
220 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/ex

282 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=12094241&concepts=gene 9s
12094241 no abstract,skip
283 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=16236521&concepts=gene 10s
284 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=12563308&concepts=gene 5s
285 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=16146838&concepts=gene 6s
286 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=12951584&concepts=gene 5s
287 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=16642045&concepts=gene 6s
288 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15625120&concepts=gene 7s
289 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11721960&concepts=gene 7s
290 https://www.ncbi.nlm.nih.gov/research/pub

350 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15082523&concepts=gene 10s
351 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=9422516&concepts=gene 10s
352 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=10835690&concepts=gene 6s
353 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=10453277&concepts=gene 6s
354 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11750244&concepts=gene 10s
355 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=8069858&concepts=gene 10s
356 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11527574&concepts=gene 7s
357 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15947972&concepts=gene 8s
358 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/ex

421 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15759102&concepts=gene 5s
422 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=12480546&concepts=gene 8s
423 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11390407&concepts=gene 6s
424 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=9011569&concepts=gene 9s
425 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=7491105&concepts=gene 10s
426 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15258147&concepts=gene 7s
15258147 no abstract,skip
427 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=9974390&concepts=gene 8s
428 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11325516&concepts=gene 10s
429 https://www.ncbi.nlm.nih.gov/research/pubta

491 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=31191253&concepts=gene 9s
492 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=27807401&concepts=gene 10s
493 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19903023&concepts=gene 9s
494 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19464758&concepts=gene 10s
495 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=12805290&concepts=gene 10s
496 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15645263&concepts=gene 6s
497 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=26074767&concepts=gene 7s
498 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=24529521&concepts=gene 5s
499 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/e

561 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=18172298&concepts=gene 9s
562 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=21849159&concepts=gene 6s
563 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=20153925&concepts=gene 5s
564 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15644444&concepts=gene 10s
565 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=20190806&concepts=gene 9s
566 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=20145147&concepts=gene 10s
567 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=17327229&concepts=gene 6s
568 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=21123453&concepts=gene 8s
569 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/ex

631 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=21747944&concepts=gene 8s
632 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=20478527&concepts=gene 7s
633 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19399032&concepts=gene 8s
634 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=25915839&concepts=gene 10s
635 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=25485532&concepts=gene 9s
636 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=18172298&concepts=gene 5s
637 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=28968951&concepts=gene 10s
638 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=14576837&concepts=gene 7s
639 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/ex

702 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=21154658&concepts=gene 8s
703 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=20159989&concepts=gene 5s
704 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=17459420&concepts=gene 7s
705 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=22178655&concepts=gene 7s
706 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=8951235&concepts=gene 5s
707 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=21163908&concepts=gene 5s
708 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=10753215&concepts=gene 5s
709 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15582261&concepts=gene 9s
710 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/expor

772 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15349822&concepts=gene 9s
773 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=9196022&concepts=gene 5s
774 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=12737309&concepts=gene 7s
775 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11459867&concepts=gene 7s
776 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15573119&concepts=gene 7s
777 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15310786&concepts=gene 8s
778 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11223406&concepts=gene 5s
779 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15711891&concepts=gene 5s
780 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/expor

843 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=31269778&concepts=gene 7s
844 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=24307101&concepts=gene 10s
845 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=29867137&concepts=gene 10s
846 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=16487830&concepts=gene 10s
847 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=32024054&concepts=gene 9s
848 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=18201576&concepts=gene 8s
849 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=21102637&concepts=gene 8s
850 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19400863&concepts=gene 7s
851 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/e

914 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=17645777&concepts=gene 6s
915 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15342967&concepts=gene 7s
916 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=20592731&concepts=gene 10s
917 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15956361&concepts=gene 6s
918 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=18385245&concepts=gene 6s
919 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=9374493&concepts=gene 8s
920 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=16381817&concepts=gene 6s
921 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=12970441&concepts=gene 8s
922 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/expo

985 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=21681604&concepts=gene 6s
986 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=26579470&concepts=gene 5s
987 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=23358468&concepts=gene 6s
988 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=25232485&concepts=gene 9s
989 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19632948&concepts=gene 8s
990 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=18483355&concepts=gene 7s
991 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=23401451&concepts=gene 5s
992 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=24307395&concepts=gene 8s
993 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/expo

1055 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11058677&concepts=gene 8s
11058677 no abstract,skip
1056 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15661024&concepts=gene 6s
1057 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=14647478&concepts=gene 8s
14647478 no abstract,skip
1058 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15510157&concepts=gene 8s
1059 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=21372216&concepts=gene 5s
1060 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=21429244&concepts=gene 10s
1061 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=28408983&concepts=gene 7s
1062 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=24586164&concepts=gene 9s
1063 https:

1125 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=30941048&concepts=gene 8s
1126 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=29128428&concepts=gene 10s
1127 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=32326537&concepts=gene 6s
1128 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19168735&concepts=gene 6s
19168735 no abstract,skip
1129 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=12088284&concepts=gene 5s
1130 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=16600243&concepts=gene 6s
1131 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=17582444&concepts=gene 7s
1132 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11850009&concepts=gene 9s
1133 https://www.ncbi.nlm.nih.gov/res

1194 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=25048860&concepts=gene 9s
1195 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11313942&concepts=gene 9s
1196 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19422821&concepts=gene 9s
1197 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=10942706&concepts=gene 9s
1198 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11032799&concepts=gene 9s
1199 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11292584&concepts=gene 5s
1200 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=12102686&concepts=gene 7s
1201 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=12403808&concepts=gene 7s
1202 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publicat

1262 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=22307140&concepts=gene 10s
1263 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19270154&concepts=gene 9s
1264 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=21340684&concepts=gene 5s
1265 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=22750268&concepts=gene 10s
1266 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=20520704&concepts=gene 7s
20520704 no abstract,skip
1267 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=24113830&concepts=gene 9s
1268 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=24738035&concepts=gene 10s
1269 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=22101401&concepts=gene 6s
1270 https://www.ncbi.nlm.nih.gov/r

1331 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19255147&concepts=gene 10s
1332 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=20655987&concepts=gene 5s
1333 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11931647&concepts=gene 9s
1334 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=10687947&concepts=gene 9s
1335 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=10633082&concepts=gene 7s
1336 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=12509413&concepts=gene 7s
1337 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=14711376&concepts=gene 10s
1338 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=17971838&concepts=gene 9s
1339 https://www.ncbi.nlm.nih.gov/research/pubtator-api/public

1401 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=16899407&concepts=gene 7s
1402 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15009714&concepts=gene 5s
1403 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11224709&concepts=gene 10s
1404 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15721476&concepts=gene 10s
1405 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=14695152&concepts=gene 10s
1406 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15557758&concepts=gene 6s
1407 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=10843728&concepts=gene 6s
1408 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=16495942&concepts=gene 7s
1409 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publi

1471 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=31618844&concepts=gene 10s
1472 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=31795242&concepts=gene 8s
1473 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=30710214&concepts=gene 9s
1474 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=23439666&concepts=gene 6s
1475 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=28643372&concepts=gene 5s
1476 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=29194372&concepts=gene 8s
1477 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=24619348&concepts=gene 7s
1478 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=29476642&concepts=gene 5s
1479 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publica

1541 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=26870016&concepts=gene 8s
1542 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=20592731&concepts=gene 5s
1543 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=25752815&concepts=gene 6s
1544 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=26008697&concepts=gene 7s
1545 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19684440&concepts=gene 7s
1546 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=23164805&concepts=gene 10s
1547 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19156753&concepts=gene 6s
1548 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=17645777&concepts=gene 10s
1549 https://www.ncbi.nlm.nih.gov/research/pubtator-api/public

1611 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=21383764&concepts=gene 10s
1612 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=27060663&concepts=gene 5s
1613 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=28035083&concepts=gene 7s
1614 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=23145028&concepts=gene 10s
1615 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15990176&concepts=gene 7s
1616 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=25549333&concepts=gene 10s
1617 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=24445563&concepts=gene 10s
1618 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=20398729&concepts=gene 7s
1619 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publ

1680 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=17364144&concepts=gene 10s
1681 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=10811837&concepts=gene 9s
10811837 no abstract,skip
1682 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19203113&concepts=gene 8s
1683 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19203114&concepts=gene 7s
1684 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19198566&concepts=gene 6s
1685 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=16690907&concepts=gene 9s
1686 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=22049305&concepts=gene 8s
1687 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=27854326&concepts=gene 8s
1688 https://www.ncbi.nlm.nih.gov/res

1750 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=18382419&concepts=gene 6s
1751 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=16101641&concepts=gene 7s
1752 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=16124859&concepts=gene 10s
1753 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=30059722&concepts=gene 8s
1754 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=22869468&concepts=gene 8s
1755 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=26239691&concepts=gene 5s
1756 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=26776362&concepts=gene 7s
26776362 no abstract,skip
1757 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19201080&concepts=gene 10s
1758 https://www.ncbi.nlm.nih.gov/re

1819 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19075677&concepts=gene 9s
1820 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=20457215&concepts=gene 5s
1821 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11378035&concepts=gene 10s
1822 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=10756002&concepts=gene 5s
1823 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=9832261&concepts=gene 7s
1824 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=14700586&concepts=gene 8s
14700586 no abstract,skip
1825 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15813717&concepts=gene 5s
1826 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19207103&concepts=gene 10s
1827 https://www.ncbi.nlm.nih.gov/res

1888 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=26968398&concepts=gene 7s
1889 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=26040571&concepts=gene 6s
1890 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=23196196&concepts=gene 6s
1891 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=23881035&concepts=gene 7s
1892 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=25013431&concepts=gene 5s
1893 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11879567&concepts=gene 9s
1894 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15343273&concepts=gene 9s
1895 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=17384582&concepts=gene 5s
1896 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publicat

1958 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=18281405&concepts=gene 8s
1959 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19343209&concepts=gene 7s
1960 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=20941588&concepts=gene 6s
1961 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=20639898&concepts=gene 9s
1962 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=26099527&concepts=gene 7s
1963 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=17295177&concepts=gene 6s
1964 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=16799620&concepts=gene 10s
1965 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19387255&concepts=gene 8s
1966 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publica

2028 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=17828818&concepts=gene 6s
2029 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=16611896&concepts=gene 9s
2030 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19148799&concepts=gene 6s
2031 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=17764115&concepts=gene 6s
2032 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=18251977&concepts=gene 5s
2033 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19101208&concepts=gene 6s
2034 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=8920859&concepts=gene 10s
2035 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15106591&concepts=gene 5s
2036 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publicat

2098 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=23344024&concepts=gene 8s
2099 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=19701242&concepts=gene 9s
2100 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=22015292&concepts=gene 8s
2101 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=25907219&concepts=gene 10s
2102 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=24841429&concepts=gene 6s
2103 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=26849149&concepts=gene 5s
2104 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=22771991&concepts=gene 7s
2105 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=20887199&concepts=gene 7s
2106 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publica

2168 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11276000&concepts=gene 8s
2169 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15270885&concepts=gene 9s
2170 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=14737121&concepts=gene 9s
14737121 no abstract,skip
2171 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=15656799&concepts=gene 8s
2172 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=14556242&concepts=gene 7s
2173 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11736900&concepts=gene 8s
2174 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=16054233&concepts=gene 6s
2175 https://www.ncbi.nlm.nih.gov/research/pubtator-api/publications/export/pubtator?pmids=11130178&concepts=gene 9s
2176 https://www.ncbi.nlm.nih.gov/rese

In [192]:
### Obtain Entities info in PubTator *
#load result
complete_PMID = []
for N in range(0,len(PMID)):
    
    ## original format
    try:
        PubTator = globals()["pub_ent_{}".format(PMID[N])]
        complete_PMID.append(PMID[N])
    except KeyError:
        print(PMID[N],'no information,skip')
        continue


    ## split info
    PubTator_aplit = PubTator.split('\n') 


    ## Abstract & Title
    Title = PubTator_aplit[0][PubTator_aplit[0].find('|t|')+3:]
    Abs = PubTator_aplit[1][PubTator_aplit[1].find('|a|')+3:]


    ## NER
    NER_DF = pd.DataFrame(columns=['PMID','START','END','ENTITY','TYPE','database_IDENTIFIER'])
    for c in range(2,len(PubTator_aplit)-2):
        NER_col = PubTator_aplit[c].split('\t')
        NER_DF.loc[len(NER_DF)] = NER_col

    ## save in variable
    globals()["Abs_{}".format(PMID[N])] = Abs[:] #ex:Abs_33014208
    globals()["Title_{}".format(PMID[N])] = Title[:] #ex:Title_33014208
    globals()["entity_{}".format(PMID[N])] = NER_DF[:] #ex:entity_33014208

    print(N,'PMID:'+PMID[N],'Done')

10647931 no information,skip
1 PMID:15000146 Done
2 PMID:16555243 Done
3 PMID:11078609 Done
4 PMID:16699851 Done
12621137 no information,skip
6 PMID:12951588 Done
7 PMID:10505543 Done
8 PMID:16042571 Done
9 PMID:15349822 Done
10 PMID:9196022 Done
11 PMID:12737309 Done
12 PMID:11459867 Done
13 PMID:15573119 Done
14 PMID:15310786 Done
15 PMID:11223406 Done
16 PMID:15711891 Done
17 PMID:11170304 Done
18 PMID:16326109 Done
15479695 no information,skip
20 PMID:11477132 Done
21 PMID:12459728 Done
22 PMID:11407945 Done
23 PMID:12946833 Done
24 PMID:16702400 Done
25 PMID:10740269 Done
26 PMID:12648469 Done
27 PMID:14967450 Done
28 PMID:14586404 Done
29 PMID:12545153 Done
30 PMID:14580692 Done
31 PMID:12079267 Done
32 PMID:15774796 Done
33 PMID:15823750 Done
34 PMID:11707511 Done
15639402 no information,skip
36 PMID:12154354 Done
37 PMID:11253051 Done
38 PMID:16700615 Done
39 PMID:12762887 Done
40 PMID:16095998 Done
11057895 no information,skip
42 PMID:9643506 Done
43 PMID:12084351 Done
44 PMID

362 PMID:12186904 Done
363 PMID:15795286 Done
364 PMID:11886269 Done
365 PMID:11278402 Done
366 PMID:21250384 Done
367 PMID:12379483 Done
368 PMID:22110506 Done
369 PMID:20494113 Done
370 PMID:21372216 Done
371 PMID:15510157 Done
372 PMID:14506283 Done
373 PMID:11160754 Done
374 PMID:15452270 Done
375 PMID:15767449 Done
376 PMID:21177815 Done
377 PMID:21347341 Done
378 PMID:15542638 Done
379 PMID:14747563 Done
380 PMID:16352731 Done
381 PMID:18391203 Done
382 PMID:11091847 Done
383 PMID:19078975 Done
384 PMID:17049016 Done
385 PMID:11905817 Done
386 PMID:11669605 Done
387 PMID:11766888 Done
388 PMID:10793105 Done
389 PMID:19156753 Done
390 PMID:17645777 Done
391 PMID:15342967 Done
392 PMID:20592731 Done
393 PMID:15956361 Done
394 PMID:18385245 Done
395 PMID:9374493 Done
396 PMID:16381817 Done
397 PMID:12970441 Done
398 PMID:9349482 Done
399 PMID:8617242 Done
400 PMID:9990017 Done
401 PMID:17384582 Done
402 PMID:15310405 Done
403 PMID:12910250 Done
404 PMID:11244044 Done
405 PMID:187028

720 PMID:21076461 Done
721 PMID:20971825 Done
722 PMID:26968398 Done
723 PMID:26040571 Done
724 PMID:23196196 Done
725 PMID:23881035 Done
726 PMID:25013431 Done
727 PMID:11879567 Done
728 PMID:15343273 Done
729 PMID:12379883 Done
730 PMID:12741677 Done
731 PMID:12711111 Done
732 PMID:12407699 Done
733 PMID:11720739 Done
734 PMID:16112428 Done
735 PMID:11902573 Done
736 PMID:10944551 Done
737 PMID:11807782 Done
738 PMID:16064138 Done
739 PMID:11463388 Done
740 PMID:15109562 Done
741 PMID:11559746 Done
742 PMID:11905738 Done
743 PMID:12243753 Done
744 PMID:12616528 Done
745 PMID:23070004 Done
746 PMID:12379883 Done
747 PMID:12741677 Done
748 PMID:16870043 Done
749 PMID:15948711 Done
750 PMID:11902573 Done
751 PMID:10944551 Done
752 PMID:12867060 Done
753 PMID:16112428 Done
754 PMID:12407699 Done
755 PMID:15942304 Done
756 PMID:11895903 Done
757 PMID:16189154 Done
758 PMID:17625570 Done
759 PMID:18593892 Done
760 PMID:19629074 Done
761 PMID:23201355 Done
762 PMID:22154278 Done
763 PMID:21

1074 PMID:27518127 Done
1075 PMID:15573137 Done
1076 PMID:17329974 Done
1077 PMID:23028325 Done
1078 PMID:10318948 Done
1079 PMID:26192396 Done
1080 PMID:25187543 Done
1081 PMID:17050609 Done
1082 PMID:16585191 Done
1083 PMID:22521915 Done
1084 PMID:25594835 Done
1085 PMID:14991150 Done
1086 PMID:26870016 Done
1087 PMID:15510157 Done
1088 PMID:21372216 Done
1089 PMID:21429244 Done
1090 PMID:28408983 Done
1091 PMID:24586164 Done
1092 PMID:23850008 Done
1093 PMID:25704559 Done
1094 PMID:22110506 Done
1095 PMID:23303627 Done
1096 PMID:29116588 Done
1097 PMID:23435233 Done
1098 PMID:19498380 Done
1099 PMID:18977445 Done
1100 PMID:21146386 Done
1101 PMID:27886133 Done
1102 PMID:22249143 Done
1103 PMID:25613730 Done
1104 PMID:17601372 Done
1105 PMID:23793113 Done
1106 PMID:28835489 Done
1107 PMID:12740915 Done
1108 PMID:25873381 Done
1109 PMID:11905817 Done
1110 PMID:11669605 Done
1111 PMID:14506283 Done
1112 PMID:25403939 Done
1113 PMID:29932446 Done
1114 PMID:19649319 Done
1115 PMID:192443

1425 PMID:20398209 Done
11172168 no information,skip
1427 PMID:11477111 Done
1428 PMID:10719286 Done
1429 PMID:17976144 Done
1430 PMID:11686879 Done
1431 PMID:17133123 Done
1432 PMID:18274560 Done
1433 PMID:18274559 Done
1434 PMID:12093011 Done
1435 PMID:17364144 Done
10811837 no information,skip
18305268 no information,skip
1438 PMID:12835292 Done
1439 PMID:17943287 Done
1440 PMID:8048228 Done
1441 PMID:16932712 Done
1442 PMID:15459673 Done
1443 PMID:15972354 Done
1444 PMID:10430616 Done
1445 PMID:12965173 Done
1446 PMID:10025913 Done
1447 PMID:15380523 Done
1448 PMID:16127012 Done
1449 PMID:20036936 Done
1450 PMID:20651747 Done
1451 PMID:17195034 Done
1452 PMID:20193003 Done
1453 PMID:17502360 Done
9627005 no information,skip
1455 PMID:20305562 Done
1456 PMID:15059264 Done
1457 PMID:11861618 Done
1458 PMID:19184540 Done
1459 PMID:16313349 Done
1460 PMID:20392226 Done
1461 PMID:19585227 Done
1462 PMID:19455385 Done
1463 PMID:17380158 Done
1464 PMID:20510236 Done
1465 PMID:15055519 Don

1781 PMID:12818179 Done
1782 PMID:10234039 Done
1783 PMID:12064476 Done
1784 PMID:14566342 Done
1785 PMID:15582154 Done
1786 PMID:11595774 Done
1787 PMID:11268215 Done
1788 PMID:14746512 Done
16724157 no information,skip
1790 PMID:18440072 Done
1791 PMID:16776597 Done
1792 PMID:16151049 Done
1793 PMID:14622123 Done
1794 PMID:10414964 Done
1795 PMID:8753884 Done
10774721 no information,skip
1797 PMID:17728700 Done
1798 PMID:14746512 Done
1799 PMID:19199083 Done
1800 PMID:18617632 Done
1801 PMID:21338876 Done
8630246 no information,skip
1803 PMID:18632938 Done
1804 PMID:21989194 Done
1805 PMID:14529709 Done
1806 PMID:19222557 Done
1807 PMID:15661627 Done
1808 PMID:11152760 Done
1809 PMID:16581089 Done
1810 PMID:8785060 Done
1811 PMID:11750924 Done
1812 PMID:10066284 Done
1813 PMID:18349442 Done
1814 PMID:12859419 Done
1815 PMID:1346804 Done
1816 PMID:20962224 Done
1817 PMID:8987801 Done
1818 PMID:21872647 Done
1819 PMID:12217944 Done
1820 PMID:8096779 Done
1821 PMID:21930931 Done
1822 PM

2120 PMID:22474259 Done
2121 PMID:24493829 Done
2122 PMID:23071030 Done
2123 PMID:25337673 Done
17554333 no information,skip
2125 PMID:14576837 Done
2126 PMID:26184483 Done
2127 PMID:17625587 Done
2128 PMID:26886018 Done
2129 PMID:23442359 Done
2130 PMID:24074238 Done
2131 PMID:21892204 Done
2132 PMID:17336087 Done
2133 PMID:20647037 Done
2134 PMID:25546083 Done
2135 PMID:14609433 Done
2136 PMID:24874729 Done
2137 PMID:25058905 Done
2138 PMID:20811617 Done
2139 PMID:19190080 Done
2140 PMID:26867799 Done
2141 PMID:15607932 Done
2142 PMID:22921318 Done
2143 PMID:17333344 Done
2144 PMID:26920997 Done
2145 PMID:24732946 Done
2146 PMID:18297517 Done
2147 PMID:17203006 Done
2148 PMID:17092765 Done
16380836 no information,skip
2150 PMID:22954694 Done
2151 PMID:16491071 Done
2152 PMID:23987246 Done
8793930 no information,skip
2154 PMID:15117425 Done
2155 PMID:14576850 Done
21317831 no information,skip
2157 PMID:23897011 Done
2158 PMID:11509639 Done
2159 PMID:23344024 Done
2160 PMID:19701242 Do

In [193]:
# display annotation
entity_33014208

Unnamed: 0,PMID,START,END,ENTITY,TYPE,database_IDENTIFIER
0,33014208,459,472,interleukin-6,Gene,3569
1,33014208,474,478,IL-6,Gene,3569
2,33014208,546,568,nuclear factor kappa B,Gene,4790
3,33014208,570,579,NF-kappaB,Gene,4790
4,33014208,638,642,IL-6,Gene,3569
5,33014208,643,693,signal transducer and activator of transcripti...,Gene,6774
6,33014208,695,700,STAT3,Gene,6774
7,33014208,706,715,NF-kappaB,Gene,4790
8,33014208,825,829,IL-6,Gene,3569
9,33014208,830,835,STAT3,Gene,6774


*更新-所有使用到的PMID*

In [32]:

Ref_PMID_DF = Ref_PMID_DF[Ref_PMID_DF['kegg_pmid'].isin(complete_PMID)]
PMID_final = Ref_PMID_DF.kegg_pmid.values

In [33]:
print('refer abstract:',len(set(PMID)))
print('no info:',len(set(PMID))-len(set(PMID_final)))
print('total abstract:',len(set(PMID_final)))

refer abstract: 1918
no info: 82
total abstract: 1836


In [34]:
no_info_abs = list(set(PMID).symmetric_difference(set(PMID_final)))
no_info_abs

['10647931',
 '14737121',
 '11349009',
 '23675659',
 '11239414',
 '10785635',
 '10403855',
 '24398961',
 '12878745',
 '11057895',
 '16364488',
 '19133982',
 '22767237',
 '11807533',
 '19357408',
 '15639402',
 '21317831',
 '8786033',
 '28700839',
 '17554333',
 '19286611',
 '14685170',
 '16339096',
 '12085089',
 '31145571',
 '8793930',
 '15451808',
 '11160144',
 '19168735',
 '33184193',
 '16001050',
 '10571983',
 '16551847',
 '11454785',
 '10898785',
 '14647478',
 '15479695',
 '22085380',
 '18305268',
 '17200452',
 '12915094',
 '20520704',
 '27806234',
 '10811837',
 '15122209',
 '14708004',
 '22042966',
 '15049952',
 '8630246',
 '16822996',
 '10774721',
 '16380836',
 '16394273',
 '12042806',
 '14987366',
 '15078920',
 '11172168',
 '24048132',
 '11071626',
 '16306523',
 '14720582',
 '26285834',
 '11484003',
 '11717317',
 '16551846',
 '12621137',
 '11225594',
 '16724157',
 '14614204',
 '26776362',
 '11524405',
 '23641066',
 '15258147',
 '14700586',
 '18070909',
 '9627005',
 '15083155',
 '3

#### save data result

In [423]:
%cd C:/Users/larry/Desktop/jupyter/Interaction_corpus/pubtator_annotation(kegg_refer)
!ls

C:\Users\larry\Desktop\jupyter\Interaction corpus\pubtator_annotation(kegg_refer)
pub_ent_10025913.txt
pub_ent_10051305.txt
pub_ent_10066284.txt
pub_ent_10066513.txt
pub_ent_10074132.txt
pub_ent_10195903.txt
pub_ent_10234039.txt
pub_ent_10234174.txt
pub_ent_10318948.txt
pub_ent_10331992.txt
pub_ent_10350329.txt
pub_ent_10354594.txt
pub_ent_10377196.txt
pub_ent_10398674.txt
pub_ent_10400746.txt
pub_ent_10405893.txt
pub_ent_10414964.txt
pub_ent_10417174.txt
pub_ent_10423127.txt
pub_ent_10430616.txt
pub_ent_10453277.txt
pub_ent_10462516.txt
pub_ent_10489373.txt
pub_ent_10499590.txt
pub_ent_10500199.txt
pub_ent_10505543.txt
pub_ent_10510092.txt
pub_ent_10523854.txt
pub_ent_10525166.txt
pub_ent_10527810.txt
pub_ent_10537274.txt
pub_ent_10540218.txt
pub_ent_10586030.txt
pub_ent_10590149.txt
pub_ent_10608501.txt
pub_ent_10615904.txt
pub_ent_10633082.txt
pub_ent_10637584.txt
pub_ent_10644344.txt
pub_ent_10666184.txt
pub_ent_10667806.txt
pub_ent_10687947.txt
pub_ent_10702232.txt
pub_ent_1070706

In [426]:
# 儲存所有PubTator結果
for p in range(0,len(PMID_final)): 
    name = "pub_ent_{}".format(PMID_final[p])
    save_variable(globals()[name], name+'.txt')
    print(p,PMID_final[p],'Done')

# 儲存PMID list
PMID_final=np.array(PMID_final)
np.save('PMID_final.npy',PMID_final) # 保存为.npy格式

0 15000146 Done
1 16555243 Done
2 11078609 Done
3 16699851 Done
4 12951588 Done
5 10505543 Done
6 16042571 Done
7 15349822 Done
8 9196022 Done
9 12737309 Done
10 11459867 Done
11 15573119 Done
12 15310786 Done
13 11223406 Done
14 15711891 Done
15 11170304 Done
16 16326109 Done
17 11477132 Done
18 12459728 Done
19 11407945 Done
20 12946833 Done
21 16702400 Done
22 10740269 Done
23 12648469 Done
24 14967450 Done
25 14586404 Done
26 12545153 Done
27 14580692 Done
28 12079267 Done
29 15774796 Done
30 15823750 Done
31 11707511 Done
32 12154354 Done
33 11253051 Done
34 16700615 Done
35 12762887 Done
36 16095998 Done
37 9643506 Done
38 12084351 Done
39 16557281 Done
40 16946003 Done
41 17062879 Done
42 16242838 Done
43 16189702 Done
44 11165748 Done
45 16236521 Done
46 12563308 Done
47 16146838 Done
48 12951584 Done
49 16642045 Done
50 15625120 Done
51 11721960 Done
52 12468433 Done
53 17554387 Done
54 16352814 Done
55 15719031 Done
56 14982876 Done
57 15156182 Done
58 16484590 Done
59 983420

534 16718776 Done
535 15224192 Done
536 12894247 Done
537 28427431 Done
538 24744585 Done
539 11683489 Done
540 26857262 Done
541 24061039 Done
542 26071484 Done
543 19318488 Done
544 22586678 Done
545 23696246 Done
546 19387476 Done
547 20836672 Done
548 12154354 Done
549 11253051 Done
550 16700615 Done
551 12762887 Done
552 16095998 Done
553 9643506 Done
554 12084351 Done
555 16557281 Done
556 16946003 Done
557 17062879 Done
558 16242838 Done
559 16189702 Done
560 11165748 Done
561 16236521 Done
562 12563308 Done
563 16146838 Done
564 12951584 Done
565 16642045 Done
566 15625120 Done
567 11721960 Done
568 12468433 Done
569 16352814 Done
570 12529654 Done
571 12032780 Done
572 10717473 Done
573 19075268 Done
574 15985538 Done
575 12130514 Done
576 12393465 Done
577 15024077 Done
578 11090075 Done
579 15160934 Done
580 11607818 Done
581 19855079 Done
582 17554387 Done
583 15719031 Done
584 14982876 Done
585 15156182 Done
586 16484590 Done
587 9834202 Done
588 12801837 Done
589 12244301

1104 9990017 Done
1105 15175323 Done
1106 18505829 Done
1107 23536685 Done
1108 10942706 Done
1109 11032799 Done
1110 11292584 Done
1111 12102686 Done
1112 12403808 Done
1113 13679513 Done
1114 11953326 Done
1115 11207565 Done
1116 14659695 Done
1117 15204437 Done
1118 17156122 Done
1119 17464284 Done
1120 14700549 Done
1121 15694859 Done
1122 11389469 Done
1123 12197905 Done
1124 29477730 Done
1125 25588886 Done
1126 26104206 Done
1127 21488979 Done
1128 23927593 Done
1129 27606286 Done
1130 27617233 Done
1131 27732847 Done
1132 20300064 Done
1133 15318166 Done
1134 28875943 Done
1135 28848711 Done
1136 31402916 Done
1137 27974148 Done
1138 29477730 Done
1139 25749450 Done
1140 17983751 Done
1141 22702524 Done
1142 27222578 Done
1143 19157959 Done
1144 12675800 Done
1145 17074883 Done
1146 18411289 Done
1147 10499590 Done
1148 30953429 Done
1149 28393050 Done
1150 25999954 Done
1151 20228623 Done
1152 25264025 Done
1153 29875765 Done
1154 29477730 Done
1155 26981193 Done
1156 29477730

1644 23439666 Done
1645 28643372 Done
1646 29194372 Done
1647 24619348 Done
1648 29476642 Done
1649 21722302 Done
1650 18420416 Done
1651 16844381 Done
1652 21980554 Done
1653 25766616 Done
1654 17466621 Done
1655 10195903 Done
1656 14532116 Done
1657 31417367 Done
1658 29261664 Done
1659 21196165 Done
1660 23479001 Done
1661 28507517 Done
1662 15528202 Done
1663 15640354 Done
1664 20205843 Done
1665 29998397 Done
1666 28714865 Done
1667 15342004 Done
1668 15611724 Done
1669 30258237 Done
1670 28062563 Done
1671 20070435 Done
1672 19707851 Done
1673 30743990 Done
1674 15272267 Done
1675 30250260 Done
1676 25132814 Done
1677 25413678 Done
1678 32397599 Done
1679 31783880 Done
1680 29734735 Done
1681 25270767 Done
1682 28279350 Done
1683 30030024 Done
1684 19043451 Done
1685 21722302 Done
1686 28643372 Done
1687 32204380 Done
1688 24619348 Done
1689 19230774 Done
1690 31795242 Done
1691 27873462 Done
1692 19828789 Done
1693 26410600 Done
1694 23361386 Done
1695 31558780 Done
1696 2043422

In [231]:
# 讀取list
PMID_final=np.load('PMID_final.npy')
PMID_final=PMID_final.tolist()
# 讀取所有PubTator結果 *讀完記得回去前面獲取註釋資訊
o = 0
PMID_final = []
for p in range(0,len(PMID_final)): 
    name = "pub_ent_{}".format(PMID_final[p])
    globals()[name] = load_variavle(name+'.txt')
    o+=1
    PMID_final.append(PMID_final[p])
    print(o,name+'.txt','done')

no info
1 pub_ent_15000146.txt done
2 pub_ent_16555243.txt done
3 pub_ent_11078609.txt done
4 pub_ent_16699851.txt done
no info
5 pub_ent_12951588.txt done
6 pub_ent_10505543.txt done
7 pub_ent_16042571.txt done
8 pub_ent_15349822.txt done
9 pub_ent_9196022.txt done
10 pub_ent_12737309.txt done
11 pub_ent_11459867.txt done
12 pub_ent_15573119.txt done
13 pub_ent_15310786.txt done
14 pub_ent_11223406.txt done
15 pub_ent_15711891.txt done
16 pub_ent_11170304.txt done
17 pub_ent_16326109.txt done
no info
18 pub_ent_11477132.txt done
19 pub_ent_12459728.txt done
20 pub_ent_11407945.txt done
21 pub_ent_12946833.txt done
22 pub_ent_16702400.txt done
23 pub_ent_10740269.txt done
24 pub_ent_12648469.txt done
25 pub_ent_14967450.txt done
26 pub_ent_14586404.txt done
27 pub_ent_12545153.txt done
28 pub_ent_14580692.txt done
29 pub_ent_12079267.txt done
30 pub_ent_15774796.txt done
31 pub_ent_15823750.txt done
32 pub_ent_11707511.txt done
no info
33 pub_ent_12154354.txt done
34 pub_ent_11253051.t

782 pub_ent_15638722.txt done
783 pub_ent_17140287.txt done
784 pub_ent_23430691.txt done
785 pub_ent_19038776.txt done
786 pub_ent_21835787.txt done
787 pub_ent_30059722.txt done
788 pub_ent_22869468.txt done
789 pub_ent_26239691.txt done
no info
790 pub_ent_19201080.txt done
791 pub_ent_19263472.txt done
792 pub_ent_15173177.txt done
793 pub_ent_16459163.txt done
794 pub_ent_19854353.txt done
795 pub_ent_21994562.txt done
796 pub_ent_11099494.txt done
797 pub_ent_16891440.txt done
798 pub_ent_28820498.txt done
799 pub_ent_16173017.txt done
800 pub_ent_18481805.txt done
801 pub_ent_21647825.txt done
802 pub_ent_9195957.txt done
803 pub_ent_12686421.txt done
804 pub_ent_9738022.txt done
805 pub_ent_18251977.txt done
806 pub_ent_17059849.txt done
807 pub_ent_11743185.txt done
808 pub_ent_15132991.txt done
809 pub_ent_18605975.txt done
810 pub_ent_21994545.txt done
811 pub_ent_28862649.txt done
812 pub_ent_28704708.txt done
813 pub_ent_24530748.txt done
814 pub_ent_25550115.txt done
815 

1302 pub_ent_17110955.txt done
1303 pub_ent_17024357.txt done
1304 pub_ent_19400868.txt done
1305 pub_ent_18059289.txt done
1306 pub_ent_15361242.txt done
1307 pub_ent_18617274.txt done
no info
1308 pub_ent_12106793.txt done
1309 pub_ent_12857995.txt done
1310 pub_ent_10525166.txt done
1311 pub_ent_18327664.txt done
1312 pub_ent_10234174.txt done
1313 pub_ent_16339966.txt done
1314 pub_ent_11890722.txt done
1315 pub_ent_10770277.txt done
1316 pub_ent_7591091.txt done
1317 pub_ent_10417174.txt done
1318 pub_ent_7916951.txt done
1319 pub_ent_15721837.txt done
1320 pub_ent_17543969.txt done
1321 pub_ent_15983048.txt done
1322 pub_ent_15322192.txt done
no info
1323 pub_ent_15039466.txt done
1324 pub_ent_10586030.txt done
1325 pub_ent_15831826.txt done
1326 pub_ent_11298294.txt done
1327 pub_ent_12438359.txt done
1328 pub_ent_9504340.txt done
1329 pub_ent_18793215.txt done
1330 pub_ent_10462516.txt done
1331 pub_ent_15639739.txt done
1332 pub_ent_16848789.txt done
1333 pub_ent_11728310.txt 

1932 pub_ent_21038418.txt done
no info
1933 pub_ent_22570745.txt done
1934 pub_ent_20619513.txt done
1935 pub_ent_20303879.txt done
1936 pub_ent_17498512.txt done
1937 pub_ent_16374858.txt done
1938 pub_ent_22940046.txt done
1939 pub_ent_14685853.txt done
1940 pub_ent_12891546.txt done
1941 pub_ent_16406828.txt done
1942 pub_ent_9285483.txt done
1943 pub_ent_21145844.txt done
1944 pub_ent_18242193.txt done
1945 pub_ent_18061659.txt done
1946 pub_ent_23299992.txt done
1947 pub_ent_22360800.txt done
1948 pub_ent_12765939.txt done
1949 pub_ent_20526383.txt done
1950 pub_ent_7666792.txt done
1951 pub_ent_15166380.txt done
1952 pub_ent_22675355.txt done
1953 pub_ent_17130651.txt done
1954 pub_ent_16186396.txt done
1955 pub_ent_18309108.txt done
1956 pub_ent_17141630.txt done
1957 pub_ent_9886945.txt done
1958 pub_ent_8048519.txt done
1959 pub_ent_22677645.txt done
no info
1960 pub_ent_15107844.txt done
1961 pub_ent_20466550.txt done
1962 pub_ent_23647930.txt done
1963 pub_ent_16756496.txt d

In [417]:
len(PMID_final)

2075