In [9]:
#BIO and NLTK
from Bio import Medline
from Bio import Entrez
import nltk
nltk.download('stopwords')
nltk.download('punkt')
from nltk.corpus import stopwords                       
from nltk.tokenize import word_tokenize, sent_tokenize
#Base Packages
import time
import heapq
import re
import urllib.parse

In [74]:
def score_sentences(inp):
    stopWords = set(stopwords.words("english"))
    sentsb4 = sent_tokenize(inp)
    text = inp
    text = re.sub(r'\[[0-9]*\]|[^a-zA-Z0-9. ]', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    words,sents = word_tokenize(text),sent_tokenize(text)
    
    freqTable = {}               
    for word in words:                                
        if word not in stopWords:
            #word = word.lower()
            if word in freqTable:                       
                freqTable[word.lower()] += 1            
            else:          
                freqTable[word.lower()] = 1
    max_freq = max(freqTable.values())
    for k,v in freqTable.items():
        freqTable[k] = v/max_freq
    
    sentScores = {}
    for sent in sents:
        for vurd in word_tokenize(sent.lower()):
            if vurd in freqTable.keys():
                if sent not in sentScores.keys():
                    sentScores[sent] = freqTable[vurd]
                else:
                    sentScores[sent] += freqTable[vurd]
    out = [sentScores,sentsb4]
    return(out)

In [112]:
#Set Email and execute Query to find prospective PMIDs
Entrez.email = "youremail@domain.com" 
z = Entrez.esearch(db="pubmed",term='"mus musculus"[all] AND "vaccine"[Title/Abstract]',rettype="json",retmode="text",sort='relevance',retmax=5)
#urllib.parse.quote()
#Build list of PMIDs to inspect
PMIDs = []
for ele in z:
    ot="".join(ele.decode("utf-8"))
    if ot.startswith("<Id>"):
        sout = ot[4:ot.index("</Id>")]
        PMIDs.append(sout)

#Execute search for PMIDs, extract title/abstract/year
dicto = {}
for PMID in PMIDs:
    handle = Entrez.efetch(db="pubmed", id=PMID, rettype="medline", retmode="text")
    records = Medline.parse(handle)
    for ele in records:
        try:
            temp = {'abstract':ele["AB"],'title':ele['TI'],'date':ele['DP']}
        except:
            temp = {'abstract':"Unavailable",'title':ele['TI'],'date':ele['DP']}
    dicto[PMID] = temp
    print("Completed Parsing, waiting 2 seconds")
    time.sleep(2)

#Organize and present information in HTML
summariesDicto = {}
for PMID in dicto.keys():
    sentScores = score_sentences(dicto[PMID]['abstract'])
    sentScores = dict(zip(sentScores[1],sentScores[0].values()))
    summary_sentences = heapq.nlargest(3, sentScores, key=sentScores.get)
    summary = ' '.join(summary_sentences)
    summariesDicto[PMID] = summary

#Print
for k,v in summariesDicto.items():
    print("PMID: "+k+" | Publication Date: "+dicto[k]['date']+"\n"+"Title: "+dicto[k]['title']+"\nSummary: "+summariesDicto[k]+"\n")

Completed Parsing, waiting 2 seconds
Completed Parsing, waiting 2 seconds
Completed Parsing, waiting 2 seconds
Completed Parsing, waiting 2 seconds
Completed Parsing, waiting 2 seconds
PMID: 28515303 | Publication Date: 2017 Aug 1
Title: Spontaneous and Vaccine-Induced Clearance of Mus Musculus Papillomavirus 1 Infection.
Summary: We found that challenge of SKH1 mice (Crl:SKH1-Hrhr) with MmuPV1 by scarification on their tail resulted in three clinical outcomes: (i) persistent (>2-month) papillomas ( approximately 20%); (ii) transient papillomas that spontaneously regress, typically within 2 months ( approximately 15%); and (iii) no visible papillomas and viral clearance ( approximately 65%). We show that MmuPV1 challenge of the outbred immunocompetent SKH1 strain produces both transient and persistent papillomas and that vaccination of the mice with a DNA expressing an MmuPV1 E6E7L2 fusion with calreticulin can rapidly clear persistent papillomas. SKH1 mice with persistent papillomas w