In [None]:
# import a module just a .py file
from Bio import Entrez

In [None]:
# email you use from NCBI
Entrez.email = "benmainye@gmail.com"

In [None]:
# make a placeholder to store result from querying 
handle = Entrez.esearch(db = "pubmed", term="[Open science] AND Kenya")

In [None]:
# make another placeholder which queries NCBI to get the NCBI
# IDs of interest
record = Entrez.read(handle)

In [None]:
# The result is a dictionary that contains various values
# Try running this line of code without the bracket ["IdList"]
record["IdList"]

Great! We now have **PubMed IDs** that could contain Id's that have papers related to term *[Open science] AND Kenya.* This is the same way you'd request for information at pubmed in NCBI. Let go ahead and get the information the full paper if possible in the next step.

In [None]:
# We just need to change our handle to get for instance a summary of 
# the data the papers we need
handle2 = Entrez.esummary(db="pubmed", id = "30123385")

In [None]:
# Let's bring our result back from NCBI
record2 = Entrez.read(handle2)

# see what we are capable of subsetting
record2

In [None]:
print("Extract interesting entries in the data")
print("")
print(record2[0]['Id'])
print("")
print(record2[0]['Title'])
print("")
print(record2[0]['AuthorList'])
print("")
print(record2[0]['FullJournalName'])
print("")
print(record2[0]['EPubDate'])

In [None]:
# We can't extract everything we want so let's just get the full paper
# if we can
?Entrez.efetch

In [None]:
# As Caleb said this will give us XML output which we continue to parse
# Notice, the documentation of each argument need more annotation
handle4 = Entrez.efetch(db="pubmed", id = "30123385", rettype="gb",retmode="text")

# fetching the result from the database
# print(handle4.read())

In [None]:
# Checking what type of object is return XML maybe?
# remove the delete the hash tag to see for yourself
# type(handle4.read())

In [None]:
# Using store magic command to store the output to a file
# Storing in variables for later is not allowed
%store handle4.read() >> file.txt

In [None]:
# you can use shell commands directly on the cell of jupyter
%cat file.txt

We have to write code 20 times! Nope there's an efficient way to solve our this problem, at least partially. It's .... you guessed it write a function. Go back to the code we wrote we put everything together and change just a few things and we are golden right? RIGHT?

In [None]:
# write a function to automate rewriting the code over and over again
def paper_retriever(email, searchterm, pubmedid):
    '''The paper retriever function takes your email which uses the same name email as an 
    argument, pubmedid you can get this from the previous function, searchterm take the NCBI type of query as a string
    and renamefile just changing your file names to avoid confusion.
    
    Return the full paper depending on if it's open access or not.
    '''
    # Enter your own email
    Entrez.email = email
    
    # search NCBI from the particular search term with method esearch
    handle = Entrez.esearch(db="pubmed", term=searchterm)
    
    # get the results
    record = Entrez.read(handle)
    
    # the method efetch does and fetches the information you need brings it back to your Ipython session
    handle2 = Entrez.efetch(db="pubmed", id = pubmedid, rettype="gb",retmode="text")
    
    # seeing the results
#     print("Extract interesting entries in the data")
#     print("")
#     print(record[0]['Id'])
#     print("")
#     print(record[0]['Title'])
#     print("")
#     print(record[0]['AuthorList'])
#     print("")
#     print(record[0]['FullJournalName'])
#     print("")
#     print(record[0]['EPubDate'])
    
    # using cell magic in a function in the jupyter notebook
    return handle2.read()

In [None]:
# calling the function as a test
print (paper_retriever(email="benmainye@gmail.com", searchterm="[Open science] AND Kenya",pubmedid=30123385))

In [None]:
paper = paper_retriever(email="benmainye@gmail.com", searchterm="[Open science] AND Kenya",pubmedid=30123385)

In [None]:
paper

In [None]:
%store paper >> file2.txt

In [None]:
%cat file2.txt

In [None]:
%%writefile paper_retriever.py
from Bio import Entrez
def paper_retriever(email, searchterm, pubmedid):
    '''The paper retriever function takes your email which uses the same name email as an 
    argument, pubmedid you can get this from the previous function, searchterm take the NCBI type of query as a string.
    
    Return the full paper depending on if it's open access or not.
    '''
    # Enter your own email
    Entrez.email = email
    
    # search NCBI from the particular search term with method esearch
    handle = Entrez.esearch(db="pubmed", term=searchterm)
    
    # get the results
    record = Entrez.read(handle)
    
    # the method efetch does and fetches the information you need brings it back to your Ipython session
    handle2 = Entrez.efetch(db="pubmed", id = pubmedid, rettype="gb",retmode="text")
    
    # using cell magic in a function in the jupyter notebook
    return handle2.read()

paper1 = paper_retriever(email="benmainye@gmail.com", searchterm="[Open science] AND Kenya",pubmedid=30123385)
#%store paper >> papers1.txt
print(paper1)

In [1]:
from Bio import Entrez
def paper_parser(term, identity):
    handle = Entrez.esearch(db="pubmed", term=identity)
    record = Entrez.read(handle)
    print (record)
    handle2 = Entrez.esummary(db="pubmed", id = identity)
    record2 = Entrez.read(handle2)
    print("Extract interesting entries in the data")
    print("")
    print(record2[0]['Id'])
    print("")
    print(record2[0]['Title'])
    print("")
    print(record2[0]['AuthorList'])
    print("")
    print(record2[0]['FullJournalName'])
    print("")
    print(record2[0]['EPubDate'])
    return record

print(paper_parser(term="[Open science] AND Kenya", identity=30123385))

Email address is not specified.

To make use of NCBI's E-utilities, NCBI requires you to specify your
email address with each request.  As an example, if your email address
is A.N.Other@example.com, you can specify it as follows:
   from Bio import Entrez
   Entrez.email = 'A.N.Other@example.com'
In case of excessive usage of the E-utilities, NCBI will attempt to contact
a user at the email address provided before blocking access to the
E-utilities.


DictElement({'Count': '1', 'RetMax': '1', 'RetStart': '0', 'IdList': ['30123385'], 'TranslationSet': [], 'TranslationStack': [DictElement({'Term': '30123385[UID]', 'Field': 'UID', 'Count': '-1', 'Explode': 'N'}, attributes={}), 'GROUP'], 'QueryTranslation': '30123385[UID]'}, attributes={})
Extract interesting entries in the data

30123385

HIV Prevention in Adolescents and Young People in the Eastern and Southern African Region: A Review of Key Challenges Impeding Actions for an Effective Response.

['Govender K', 'Masebo WGB', 'Nyamaruze P', 'Cowden RG', 'Schunter BT', 'Bains A']

The open AIDS journal

2018 Jul 19
DictElement({'Count': '1', 'RetMax': '1', 'RetStart': '0', 'IdList': ['30123385'], 'TranslationSet': [], 'TranslationStack': [DictElement({'Term': '30123385[UID]', 'Field': 'UID', 'Count': '-1', 'Explode': 'N'}, attributes={}), 'GROUP'], 'QueryTranslation': '30123385[UID]'}, attributes={})


In [6]:
%%writefile paper_parser.py
from Bio import Entrez
def paper_parser(term, identity):
    Entrez.email = "benmainye@gmail.com" #use your email
    handle = Entrez.esearch(db="pubmed", term=identity)
    record = Entrez.read(handle)
    print (record)
    handle2 = Entrez.esummary(db="pubmed", id = identity)
    record2 = Entrez.read(handle2)
    print("Extract interesting entries in the data")
    print("")
    print(record2[0]['Id'])
    print("")
    print(record2[0]['Title'])
    print("")
    print(record2[0]['AuthorList'])
    print("")
    print(record2[0]['FullJournalName'])
    print("")
    print(record2[0]['EPubDate'])
    return record2

paper2 = paper_parser(term="[Open science] AND Kenya", identity=30123385)
#%store paper2 >> papers2.txt run in ipython session
print(paper2)

Overwriting paper_parser.py


In [8]:
%run paper_retriever.py


1. Open AIDS J. 2018 Jul 19;12:53-67. doi: 10.2174/1874613601812010053. eCollection 
2018.

HIV Prevention in Adolescents and Young People in the Eastern and Southern
African Region: A Review of Key Challenges Impeding Actions for an Effective
Response.

Govender K(1), Masebo WGB(1), Nyamaruze P(2), Cowden RG(3), Schunter BT(4), Bains
A(4).

Author information: 
(1)Health Economics and HIV and AIDS Research Division, University of
KwaZulu-Natal, Durban, South Africa.
(2)School of Applied Human Sciences, University of KwaZulu-Natal, Durban, South
Africa.
(3)Department of Psychology, Middle Tennessee State University, Murfreesboro,
United States of America.
(4)UNICEF, Eastern and Southern Africa Regional Office, Nairobi, Kenya.

The global commitment to ending the AIDS epidemic by 2030 places HIV prevention
at the centre of the response. With the disease continuing to disproportionately 
affect young populations in the Eastern and Southern African Region (ESAR),
particularly adolescent 

In [7]:
%run paper_parser.py

DictElement({'Count': '1', 'RetMax': '1', 'RetStart': '0', 'IdList': ['30123385'], 'TranslationSet': [], 'TranslationStack': [DictElement({'Term': '30123385[UID]', 'Field': 'UID', 'Count': '-1', 'Explode': 'N'}, attributes={}), 'GROUP'], 'QueryTranslation': '30123385[UID]'}, attributes={})
Extract interesting entries in the data

30123385

HIV Prevention in Adolescents and Young People in the Eastern and Southern African Region: A Review of Key Challenges Impeding Actions for an Effective Response.

['Govender K', 'Masebo WGB', 'Nyamaruze P', 'Cowden RG', 'Schunter BT', 'Bains A']

The open AIDS journal

2018 Jul 19
[DictElement({'Item': [], 'Id': '30123385', 'PubDate': '2018', 'EPubDate': '2018 Jul 19', 'Source': 'Open AIDS J', 'AuthorList': ['Govender K', 'Masebo WGB', 'Nyamaruze P', 'Cowden RG', 'Schunter BT', 'Bains A'], 'LastAuthor': 'Bains A', 'Title': 'HIV Prevention in Adolescents and Young People in the Eastern and Southern African Region: A Review of Key Challenges Impeding A