In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from datetime import datetime, timedelta

In [8]:
last_known_date = datetime.now().strftime('%Y-%m-%d')
def jama_get_extra_info(doi):
    global last_known_date
    response = requests.get(f'https://api.crossref.org/works/{doi}')
    json_resp = response.json()
    abstract = BeautifulSoup(json_resp['message']['abstract'],'html.parser').text
    title = json_resp['message']['title'][0]
    subjects = json_resp['message']['subject']
    url = json_resp['message']['link'][0]['URL']
    try:
        date = '-'.join(str(item) for item in json_resp['message']['published-print']['date-parts'][0])
        last_known_date = date
    except:
        date = last_known_date
    source = 'Jama'
    authors = []
    affiliations = []
    for id in json_resp['message']['author']:
        name = ""
        for name_type in ['given', 'family']:
            try:
                name+=id[name_type]+' '
            except:
                pass
        authors.append(name[:-1])
        for affil in id['affiliation']:
            affiliations.append(affil['name'])
    affiliations = list(set(affiliations))
    authors = list(set(authors))
    return abstract, title, subjects, url, date, source, authors, affiliations

def jama_get_days_back(days_back):
    days_back = 5#find and remove later
    out = []
    i=1
    while True:
        urlString = f'https://jamanetwork.com/searchresults?sort=Newest&f_ArticleTypeDisplayName=Research&page={i}'
        i+=1
        driver = webdriver.Chrome()
        driver.get(urlString)
        html = driver.page_source
        driver.close()
        soup=BeautifulSoup(html,'html.parser')
        for cite in soup.find_all('cite', class_ ='article--citation'):
            paper_info = {}
            paper_info['paper_id'] = cite.text.replace('. ', ';').split(';')[-1].strip()
            try:
                paper_info['abstract'],paper_info['title'],paper_info['subjects'],paper_info['url'],paper_info['date'],paper_info['source'],paper_info['authors'],paper_info['affiliations'] = jama_get_extra_info(paper_info['paper_id'])
            except:
                continue
            out.append(paper_info)
        if len(out)>0:
            if datetime.strptime(out[-1]['date'], '%Y-%m-%d').date() <datetime.now().date()-timedelta(days=days_back):
                break
    return pd.DataFrame.from_records(out)

In [9]:
df = jama_get_days_back(2)
df

Unnamed: 0,paper_id,abstract,title,subjects,url,date,source,authors,affiliations
0,10.1001/jamahealthforum.2023.3330,ImportanceUnderstanding how the active duty mi...,Propensity of US Military Personnel to Seek Me...,"[Public Health, Environmental and Occupational...",https://jamanetwork.com/journals/jama-health-f...,2023-10-07,Jama,"[Marigee Bacolod, Yu-Chu Shen, Jennifer A. Hei...","[IZA Institute of Labor Economics, Bonn, Germa..."
1,10.1001/jamahealthforum.2023.3274,ImportanceEducational attainment in the US is ...,Educational Attainment and US Drug Overdose De...,"[Public Health, Environmental and Occupational...",https://jamanetwork.com/journals/jama-health-f...,2023-10-07,Jama,[David Powell],"[RAND, Arlington, Virginia]"
2,10.1001/jamahealthforum.2023.3463,This survey study uses data from the 2020-2021...,Brand Differences in Underage Tobacco Use as E...,"[Public Health, Environmental and Occupational...",https://jamanetwork.com/journals/jama-health-f...,2023-10-07,Jama,"[Abigail S. Friedman, Alex C. Liber]","[Department of Health Policy and Management, Y..."
3,10.1001/jamanetworkopen.2023.36992,ImportanceLittle is known about how COVID-19 a...,Acute SARS-CoV-2 Infection and Incidence and O...,[General Medicine],https://jamanetwork.com/journals/jamanetworkop...,2023-10-07,Jama,"[Thomas D. Rea, Jennifer Z. Liu, Jamie M. Emer...","[Department of Emergency Medicine, University ..."
4,10.1001/jamanetworkopen.2023.36848,ImportanceDetermining whether migrants with no...,Comparison of Hospitalization for Nonaffective...,[General Medicine],https://jamanetwork.com/journals/jamanetworkop...,2023-10-07,Jama,"[Heidi Taipale, Jakob Bergström, Ellenor Mitte...","[School of Pharmacy, University of Eastern Fin..."
...,...,...,...,...,...,...,...,...,...
75,10.1001/jamainternmed.2023.3575,ImportanceDementia is a life-altering diagnosi...,Changes in the Use of Long-Term Medications Fo...,[Internal Medicine],https://jamanetwork.com/journals/jamainternalm...,2023-10-1,Jama,"[Bruce E. Landon, John Z. Ayanian, Vilsa E. Cu...","[Harvard Medical School, Boston, Massachusetts..."
76,10.1001/jamainternmed.2023.3562,ImportanceThe US Food and Drug Administration ...,Endovascular Aneurysm Repair Devices as a Use ...,[Internal Medicine],https://jamanetwork.com/journals/jamainternalm...,2023-10-1,Jama,"[Xuan Wang, Tianxi Cai, Florence T. Bourgeois,...","[Department of Population Health Sciences, Uni..."
77,10.1001/jamainternmed.2023.3300,ImportanceEmerging evidence indicates that exp...,Comparison of Particulate Air Pollution From D...,[Internal Medicine],https://jamanetwork.com/journals/jamainternalm...,2023-10-1,Jama,"[Jennifer Weuve, Richard Hirth, Jiaqi Gao, Ken...","[Department of Epidemiology, University of Mic..."
78,10.1001/jamainternmed.2023.3283,ImportanceAn intensive lifestyle intervention ...,Association of Intensive Lifestyle Interventio...,[Internal Medicine],https://jamanetwork.com/journals/jamainternalm...,2023-10-1,Jama,"[Anne Peters, Jeffrey C. Yu, Peter J. Huckfeld...","[Department of Biostatistics and Data Science,..."


In [10]:
df.to_pickle(r'C:\Users\ethan\Documents\ResearchFinder\daily_temp_files\jama_temp.pkl')


In [6]:
df1 = pd.read_pickle(r'C:\Users\ethan\Documents\ResearchFinder\daily_temp_files\jama_temp.pkl')
df1

Unnamed: 0,doi,abstract,title,subjects,url,date,source,authors,affiliations
0,10.1001/jamahealthforum.2023.3330,ImportanceUnderstanding how the active duty mi...,Propensity of US Military Personnel to Seek Me...,"[Public Health, Environmental and Occupational...",https://jamanetwork.com/journals/jama-health-f...,2023-10-07,Jama,"[Jennifer A. Heissel, Marigee Bacolod, Yu-Chu ...","[National Bureau of Economic Research, Cambrid..."
1,10.1001/jamahealthforum.2023.3274,ImportanceEducational attainment in the US is ...,Educational Attainment and US Drug Overdose De...,"[Public Health, Environmental and Occupational...",https://jamanetwork.com/journals/jama-health-f...,2023-10-07,Jama,[David Powell],"[RAND, Arlington, Virginia]"
2,10.1001/jamahealthforum.2023.3463,This survey study uses data from the 2020-2021...,Brand Differences in Underage Tobacco Use as E...,"[Public Health, Environmental and Occupational...",https://jamanetwork.com/journals/jama-health-f...,2023-10-07,Jama,"[Abigail S. Friedman, Alex C. Liber]","[Department of Health Policy and Management, Y..."
3,10.1001/jamanetworkopen.2023.36992,ImportanceLittle is known about how COVID-19 a...,Acute SARS-CoV-2 Infection and Incidence and O...,[General Medicine],https://jamanetwork.com/journals/jamanetworkop...,2023-10-07,Jama,"[David L. Murphy, Jamie M. Emert, Thomas D. Re...","[Department of Medicine, University of Washing..."
4,10.1001/jamanetworkopen.2023.36848,ImportanceDetermining whether migrants with no...,Comparison of Hospitalization for Nonaffective...,[General Medicine],https://jamanetwork.com/journals/jamanetworkop...,2023-10-07,Jama,"[Ellenor Mittendorfer-Rutz, Jakob Bergström, A...","[Department of Clinical Neuroscience, Division..."
...,...,...,...,...,...,...,...,...,...
75,10.1001/jamainternmed.2023.3575,ImportanceDementia is a life-altering diagnosi...,Changes in the Use of Long-Term Medications Fo...,[Internal Medicine],https://jamanetwork.com/journals/jamainternalm...,2023-10-1,Jama,"[Jeffrey Souza, Eran Politzer, John Z. Ayanian...",[Institute for Healthcare Policy and Innovatio...
76,10.1001/jamainternmed.2023.3562,ImportanceThe US Food and Drug Administration ...,Endovascular Aneurysm Repair Devices as a Use ...,[Internal Medicine],https://jamanetwork.com/journals/jamainternalm...,2023-10-1,Jama,"[Florence T. Bourgeois, Vidul Ayakulangara Pan...",[Division of Population Health and Data Scienc...
77,10.1001/jamainternmed.2023.3300,ImportanceEmerging evidence indicates that exp...,Comparison of Particulate Air Pollution From D...,[Internal Medicine],https://jamanetwork.com/journals/jamainternalm...,2023-10-1,Jama,"[Lianne Sheppard, Sara D. Adar, Joel D. Kaufma...","[University of Michigan Medical School, Ann Ar..."
78,10.1001/jamainternmed.2023.3283,ImportanceAn intensive lifestyle intervention ...,Association of Intensive Lifestyle Interventio...,[Internal Medicine],https://jamanetwork.com/journals/jamainternalm...,2023-10-1,Jama,"[Peter J. Huckfeldt, Ann S. M. Harada, Jeffrey...","[Yale University School of Public Health, New ..."


In [7]:
df2 = pd.read_pickle(r'C:\Users\ethan\Documents\ResearchFinder\last_month.pkl')
df2 = pd.concat([df2, df1]).drop_duplicates(subset=[''])

Unnamed: 0,abstract,title,paper_id,source,date,authors,affiliations,url,subjects,doi,subject,language,abstract_vec,score_divider,Emory_score,llama_abstract,affiliation,GT_score,Harold_score
14948,ImportanceHepatocellular carcinoma (HCC) is a ...,Tislelizumab vs Sorafenib as First-Line Treatm...,,Jama,2023-10-1,"[Yabing Guo, Andrew X. Zhu, Yaxi Chen, Eric As...",[Nanjing Tianyinshang Hospital of China Pharma...,https://jamanetwork.com/journals/jamaoncology/...,"[Oncology, Cancer Research]",10.1001/jamaoncol.2023.4003,,en,"[-0.04262472, 0.03069116, -0.013544184, 0.0629...",6,13.470813,,,0.0,0.0


In [None]:
df.to_pickle('')

In [None]:
jama_get_extra_info('10.1001/jamapsychiatry.2023.3708')

('ImportanceThe reported prevalence of autism in children has consistently risen over the past 20 years. The concurrent implications for the adult Medicaid system, which insures autistic adults due to low income or disability, have not been studied.ObjectiveTo estimate the prevalence of adults identified as autistic in Medicaid claims data and to examine the prevalence by year, age, and race and ethnicity to understand enrollment patterns.Design, Setting, and ParticipantsThis cohort study used data from a longitudinal Medicaid claims cohort of enrollees aged 18 years or older with a claim for autism at any point from January 1, 2011, to December 31, 2019, and an approximately 1% random sample of all adult Medicaid enrollees. The data were analyzed between February 22 and June 22, 2023.ExposureAdults enrolled in Medicaid with a claim for autism.Main Outcome and MeasuresPrevalence of autism per 1000 Medicaid enrollees for each year was calculated using denominator data from the Centers f