In [1]:
import pandas as pd
from collections import Counter
import requests
from typing import Dict, Tuple,List
import matplotlib.pyplot as plt
import os

In [2]:
tokenFile='token.ini'
dirData='data'

In [3]:
# Converts params from human-readable tuple into url request form
def setParams(searchCriteria: Tuple,maxRows: int=None):
    searchCriteria=searchCriteria
    params=[('fq',q) for q in searchCriteria]
    params[0]=('q',params[0][1])
    if maxRows:
        rows=[('rows',str(int(maxRows)))]
    else:
        rows=[('rows',str(int(2000)))]
    params=tuple(params+rows)
    return params

In [4]:
# Parameters for requests. The token can be obtained via Ads Settings->API Token
with open(tokenFile) as f:
    token = str(f.readline()).rstrip()
AdsUrl='https://api.adsabs.harvard.edu/v1/search/query'
headers = {'Authorization': 'Bearer '+token}

In [5]:
# Making a request with some additional specifier
def request(year: int, additional: str)->int:
    searchCriteria=('year:'+str(year),'database:astronomy',
                    'property:refereed','abs:"machine learning"', additional)
    params=setParams(searchCriteria,maxRows=10)
    response = requests.get(url=AdsUrl, headers=headers, params=params)
    count=response.json()['response']['numFound']
    return count

In [6]:
years=range(2010,2021)

In [7]:
# Additional specifiers, setting the topic, are chosen manually after just looking at the publications. 
# After all, a proper way to do this is to download all the abstracts and do some NLP...
additionals=['Milky Way','exoplanet','gravitational wave',
             'photometric redshift','galaxy classification','star/galaxy',
            'lensing','transient','Sun']
statByYears=pd.DataFrame(columns=['year','ML']+additionals)
for year in years:
    print(year)
    counts={'year':year}
    counts['ML']=request(year,'')
    for add in additionals:
        counts[add]=request(year,'abs:"'+add+'"')
    statByYears=statByYears.append(counts,ignore_index=True)

2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020


In [14]:
# 'Total' reports a number of papers that have mentioning of ML AND some astronomical topic in the 
# abstract. 
statByYears['total']=0
statByYears['total']=statByYears.iloc[:, 2:-1].sum(axis=1).astype(int)
statByYears

Unnamed: 0,year,ML,Milky Way,exoplanet,gravitational wave,photometric redshift,galaxy classification,star/galaxy,lensing,transient,Sun,total
0,2010,13,0,0,0,1,0,0,0,1,4,6
1,2011,10,2,0,0,2,1,0,2,0,2,9
2,2012,29,3,1,0,3,1,1,2,3,3,17
3,2013,27,2,0,3,3,0,0,0,5,6,19
4,2014,40,6,0,1,4,1,1,2,4,7,26
5,2015,61,9,1,3,7,1,4,2,8,13,48
6,2016,78,8,4,2,16,3,4,5,7,15,64
7,2017,81,7,2,6,11,4,3,7,6,16,62
8,2018,154,22,6,6,13,4,5,12,10,35,113
9,2019,245,16,11,12,13,4,6,22,18,52,154


In [15]:
statByYears.to_csv(os.path.join(dirData,'topicsFromAbstract.csv'),index=False)