In [5]:
import pandas as pd
import requests
from datetime import datetime, timedelta
from bs4 import BeautifulSoup


In [85]:
def get_pnas_auth_affil_abst(doi):
    authors = []
    affiliations = []
    response = requests.get(f'https://api.crossref.org/works/{doi}')
    json_resp = response.json()
    for id in json_resp['message']['author']:
        name = ""
        for name_type in ['given', 'faimily']:
            try:
                name+=id[name_type]+' '
            except:
                pass
        authors.append(name[:-1])
        for affil in id['affiliation']:
            affiliations.append(affil['name'])
    affiliations = list(set(affiliations))
    authors = list(set(authors))
    try:
        abstract =  BeautifulSoup(json_resp['message']['abstract'], "lxml").text.strip()
    except:
        abstract = ""
    try:
        subjects = json_resp['message']['subject']
    except:
        subjects = ""
    return authors, affiliations,abstract, subjects
def get_pnas_per_school(days_back, affiliation):
    start_date = (datetime.now()-timedelta(days=days_back)).strftime('%Y%m%d')
    end_date = datetime.now().strftime('%Y%m%d')
    url = f'https://www.pnas.org/action/showFeed?ui=0&mi=99mbi3&type=search&feed=rss&query=%2526access%253Don%2526content%253DarticlesChapters%2526dateRange%253D%25255B{start_date}%252BTO%252B{end_date}%25255D%2526field1%253DAffiliation%2526target%253Ddefault%2526text1%253D{affiliation}'

    response = requests.get(url)
    soup = BeautifulSoup(response.text, "xml")
    items = soup.find_all("item")
    out = []
    for item in items:
        paper_info = {}
        paper_info['paper_id'] = item.find("prism:doi").text
        paper_info['title']= item.find("dc:title").text
        paper_info['date']=item.find('prism:coverDisplayDate').text[:10]
        paper_info['url'] = 'https://www.pnas.org/doi/abs/'+paper_info['paper_id']
        paper_info['authors'], paper_info['affiliations'], paper_info['abstract'], paper_info['subjects'] = get_pnas_auth_affil_abst(paper_info['paper_id'])
        out.append(paper_info)
    df = pd.DataFrame.from_records(out)
    df['source'] = 'National Academy of Sciences US'
    return df
def get_pnas_days_back(num_days):
    results = []
    for affil in ['Emory', 'Georgia']:
        results.append(get_pnas_per_school(num_days, affil))
    return pd.concat(results)

In [87]:
get_pnas_days_back(80)

Unnamed: 0,paper_id,title,date,url,authors,affiliations,abstract,subjects,source
0,10.1073/pnas.2221533120,Changes in patterns of age-related network con...,2023-08-08,https://www.pnas.org/doi/abs/10.1073/pnas.2221...,"[, Vince D., Giuseppe, Teresa, Leonardo, Willi...","[Lieber Institute for Brain Development, Johns...",Alterations in fMRI-based brain functional net...,[Multidisciplinary],National Academy of Sciences US
1,10.1073/pnas.2303358120,An interchangeable prion-like domain is requir...,2023-07-25,https://www.pnas.org/doi/abs/10.1073/pnas.2303...,"[Ricardo C., Emily J., Jeannette V., David J.,...",[Department of Biochemistry and Molecular Biol...,Retrotransposons and retroviruses shape genome...,[Multidisciplinary],National Academy of Sciences US
0,10.1073/pnas.2307638120,Widespread use of proton-pumping rhodopsin in ...,2023-09-26,https://www.pnas.org/doi/abs/10.1073/pnas.2307...,"[Oscar, Babak, Susumu, Kaylie, Sarah M., Steph...","[Atmosphere and Ocean Research Institute, The ...",Photosynthetic carbon (C) fixation by phytopla...,[Multidisciplinary],National Academy of Sciences US
1,10.1073/pnas.2213838120,Factors Assessing Science’s Self-Presentation ...,2023-09-19,https://www.pnas.org/doi/abs/10.1073/pnas.2213...,"[Dror, Yotam, Kathleen Hall, Patrick E.]","[Annenberg Public Policy Center, University of...",A confirmatory factor analysis (CFA) of respon...,[Multidisciplinary],National Academy of Sciences US
2,10.1073/pnas.2304748120,Exposure to automation explains religious decl...,2023-08-22,https://www.pnas.org/doi/abs/10.1073/pnas.2304...,"[Kai Chi, Chris G., Joshua Conrad, Pok Man, Adam]","[Department of Management and Organizations, K...",The global decline of religiosity represents o...,[Multidisciplinary],National Academy of Sciences US
3,10.1073/pnas.2221533120,Changes in patterns of age-related network con...,2023-08-08,https://www.pnas.org/doi/abs/10.1073/pnas.2221...,"[, Vince D., Giuseppe, Teresa, Leonardo, Willi...","[Lieber Institute for Brain Development, Johns...",Alterations in fMRI-based brain functional net...,[Multidisciplinary],National Academy of Sciences US
4,10.1073/pnas.2307977120,Size-dependent charge transfer between water m...,2023-08-01,https://www.pnas.org/doi/abs/10.1073/pnas.2307...,"[Shiquan, Leo N. Y., Zhong Lin, Zhen]","[School of Materials Science and Engineering, ...",Contact electrification (CE) in water has attr...,[Multidisciplinary],National Academy of Sciences US
5,10.1093/pnasnexus/pgad268,Molecular basis for inhibition of methane clat...,2023-08-01,https://www.pnas.org/doi/abs/10.1093/pnasnexus...,"[Manlin, James C, Jennifer B, Zixing, Abigail ...",[School of Civil and Environmental Engineering...,Abstract\nMethane clathrates on continental ma...,,National Academy of Sciences US
6,10.1073/pnas.2303358120,An interchangeable prion-like domain is requir...,2023-07-25,https://www.pnas.org/doi/abs/10.1073/pnas.2303...,"[Ricardo C., Emily J., Jeannette V., David J.,...",[Department of Biochemistry and Molecular Biol...,Retrotransposons and retroviruses shape genome...,[Multidisciplinary],National Academy of Sciences US
7,10.1073/pnas.2305705120,Single cobalt atoms anchored on Ti3C2Tx with d...,2023-07-18,https://www.pnas.org/doi/abs/10.1073/pnas.2305...,"[Kaida, Yueping, Sihui, Mingmei, Pengfei, Hong...",[Key Laboratory of Poyang Lake Environment and...,The assimilation of antibiotic resistance gene...,[Multidisciplinary],National Academy of Sciences US


In [None]:
['paper_id', 'authors', 'subjects', 'title', 'abstract', 'url', 'date', 'affiliations']


In [72]:
doi = '10.1073/pnas.2307638120'
response = requests.get(f'https://api.crossref.org/works/{doi}')
json_resp = response.json()
json_resp

{'status': 'ok',
 'message-type': 'work',
 'message-version': '1.0.0',
 'message': {'indexed': {'date-parts': [[2023, 9, 22]],
   'date-time': '2023-09-22T05:06:39Z',
   'timestamp': 1695359199323},
  'reference-count': 56,
  'publisher': 'Proceedings of the National Academy of Sciences',
  'issue': '39',
  'license': [{'start': {'date-parts': [[2023, 9, 18]],
     'date-time': '2023-09-18T00:00:00Z',
     'timestamp': 1694995200000},
    'content-version': 'vor',
    'delay-in-days': 0,
    'URL': 'https://creativecommons.org/licenses/by-nc-nd/4.0/'}],
  'funder': [{'DOI': '10.13039/100000001',
    'name': 'National Science Foundation',
    'doi-asserted-by': 'publisher',
    'award': ['OPP1745036']},
   {'DOI': '10.13039/100000001',
    'name': 'National Science Foundation',
    'doi-asserted-by': 'publisher',
    'award': ['OPP1744760']},
   {'DOI': '10.13039/100000001',
    'name': 'National Science Foundation',
    'doi-asserted-by': 'publisher',
    'award': ['PLR1440435']}],
  '

'Photosynthetic carbon (C) fixation by phytoplankton in the Southern Ocean (SO) plays a critical role in regulating air–sea exchange of carbon dioxide and thus global climate. In the SO, photosynthesis (PS) is often constrained by low iron, low temperatures, and low but highly variable light intensities. Recently, proton-pumping rhodopsins (PPRs) were identified in marine phytoplankton, providing an alternate iron-free, light-driven source of cellular energy. These proteins pump protons across cellular membranes through light absorption by the chromophore retinal, and the resulting pH energy gradient can then be used for active membrane transport or for synthesis of adenosine triphosphate. Here, we show that PPR is pervasive in Antarctic phytoplankton, especially in iron-limited regions. In a model SO diatom, we found that it was localized to the vacuolar membrane, making the vacuole a putative alternative phototrophic organelle for light-driven production of cellular energy. Unlike ph

In [52]:
def get_pnas_auth_affil_abst(doi):
    authors = []
    affiliations = []
    response = requests.get(f'https://api.crossref.org/works/{doi}')
    json_resp = response.json()
    for id in json_resp['message']['author']:
        authors.append(id['given']+' '+id['family'])
        for affil in id['affiliation']:
            affiliations.append(affil['name'])
    affiliations = list(set(affiliations))
    authors = list(set(authors))
    return authors, affiliations, json_resp['message']['abstract'][8:][:-9], 