# We use this notebook to extract the necessary information from the NASA/ADS public repository using an API.

In [1]:
# importing the dependecies

import numpy as np
import requests
from urllib.parse import urlencode, quote_plus
import pandas as pd

In [2]:
# Since, for extracting the information of the Research Papers from NASA / ADS. We will have to use keywords to query 
# for diverse papers. For that purpose we have created a list of approximately 400 keywords. Which will be used while querying for the papers :

query_keywords = [
    'stars', 'galaxies', 'planets', 'asteroids', 'comets', 'quasars', 'pulsars', 'nebulae',
    'supernovae', 'black holes', 'cosmic rays', 'gravitational waves', 'dark matter', 'dark energy',
    'radio astronomy', 'optical astronomy', 'infrared astronomy', 'X-ray astronomy',
    'general relativity', 'quantum mechanics', 'string theory', 'cosmology',
    'telescopes', 'spectrometers', 'detectors', 'satellites', 'space probes',
    'exoplanets', 'astrobiology', 'multi-messenger astronomy', 'machine learning in astronomy',
    'celestial mechanics', 'interstellar medium', 'astrochemistry', 'gamma-ray bursts',
    'cosmic microwave background', 'solar flares', 'heliophysics', 'planetary atmospheres',
    'exoplanet atmospheres', 'interplanetary dust', 'interstellar dust', 'heliosphere',
    'astrostatistics', 'astrometry', 'astroinformatics', 'astroengineering', 'astroecology',
    'intergalactic medium', 'space weather', 'planetary geology', 'planetary geophysics',
    'space missions', 'space exploration', 'space agencies', 'space technology', 'satellite missions',
    'interplanetary missions', 'interstellar missions', 'near-Earth objects', 'Kuiper Belt', 'Oort Cloud',
    'star clusters', 'globular clusters', 'open clusters', 'planetary rings', 'binary stars',
    'variable stars', 'red giants', 'white dwarfs', 'brown dwarfs', 'planetary formation',
    'circumstellar disks', 'planetary rings', 'planetary migration', 'planetary habitability',
    'SETI', 'astroethics', 'space law', 'space policy', 'space governance', 'space debris', 'space junk',
    'orbital dynamics', 'space propulsion', 'ion propulsion', 'plasma propulsion', 'rocket science',
    'space habitats', 'space colonies', 'space settlement', 'terraforming', 'space elevators',
    'space mining', 'space resources', 'space manufacturing', 'space medicine', 'astrogeology',
    'astrogeophysics', 'cosmic inflation', 'magnetic fields in space', 'interstellar travel',
    'relativistic astrophysics', 'time dilation', 'space-time curvature', 'gravitational lensing',
    'space-time ripples', 'pulsar timing arrays', 'dark sky preservation', 'telescope arrays',
    'interferometry', 'adaptive optics', 'cosmic censorship', 'event horizon', 'cosmic strings',
    'black hole thermodynamics', 'Hawking radiation', 'primordial black holes', 'cosmic censorship',
    'event horizon telescope', 'primordial nucleosynthesis', 'big bang nucleosynthesis',
    'anthropic principle', 'cosmic censorship', 'fine-tuning of the universe', 'extragalactic astronomy',
    'large-scale structure of the universe', 'cosmic web', 'cosmic voids', 'galaxy clusters',
    'dark energy survey', 'gravitational lensing', 'cosmic microwave background', 'cosmic archaeology',
    'redshift surveys', 'large hadron collider', 'particle astrophysics', 'cosmic rays',
    'high-energy astrophysics', 'cosmic neutrinos', 'gamma-ray astronomy', 'cosmic accelerators',
    'cosmic ray showers', 'cosmic ray observatories', 'neutrino telescopes', 'cosmic ray propagation',
    'cosmic ray interactions', 'ultra-high-energy cosmic rays', 'cosmic-ray detection',
    'cosmic-ray composition', 'cosmic-ray astronomy', 'heliospheric physics', 'solar wind', 'solar flares',
    'coronal mass ejections', 'solar activity', 'solar cycle', 'solar physics', 'solar observations',
    'solar telescopes', 'solar magnetic fields', 'solar prominences', 'solar granulation',
    'solar coronal heating', 'solar photosphere', 'solar chromosphere', 'solar limb', 'solar spectrum',
    'solar radio bursts', 'solar cosmic rays', 'solar energetic particles', 'solar flares and space weather',
    'solar-terrestrial relations', 'space climate', 'cosmic dust', 'interstellar dust', 'interplanetary dust',
    'zodiacal light', 'cometary dust', 'micrometeorites', 'cosmic impact hazard', 'meteoroid streams',
    'meteor showers', 'meteoroids in space', 'meteoritic material', 'meteorite classification',
    'meteorite impact craters', 'atmospheric entry', 'meteorite flux', 'meteorite isotopes',
    'meteorite age dating', 'meteorite composition', 'meteoritic abundances', 'meteorite mineralogy',
    'meteorite petrology', 'meteorite micrometeorites', 'meteorite cosmic ray exposure',
    'meteorite preservation', 'meteorite recovery', 'meteorite research',
    'gravitational interactions', 'stellar evolution', 'cosmic ray origins', 'galactic magnetic fields',
    'dark matter candidates', 'quantum entanglement in space', 'supernova remnants', 'galactic dynamics',
    'cosmic microwave background polarization', 'solar magnetic storms', 'stellar atmospheres', 'neutrino oscillations',
    'pulsar wind nebulae', 'active galactic nuclei', 'interstellar clouds', 'galactic archaeology', 'dark sector physics',
    'exoplanet detection methods', 'habitable zones', 'extrasolar planetary systems', 'orbital debris mitigation',
    'space-based interferometers', 'solar neutrinos', 'neutron star mergers', 'stellar nucleosynthesis',
    'gravitational wave astronomy', 'neutrino astrophysics', 'solar wind interactions with planets',
    'cosmic dust in protoplanetary disks', 'helium abundance in the universe', 'neutrinoless double beta decay',
    'magnetic reconnection in astrophysics', 'interstellar medium dynamics', 'helioseismology', 'gamma-ray bursts progenitors',
    'binary star evolution', 'quantum gravity in the cosmos', 'helium reionization', 'stellar magnetic cycles',
    'neutrino detectors in space', 'cosmic magnetic fields', 'planetary migration in protoplanetary disks',
    'dark matter halos', 'solar prominence dynamics', 'supermassive black holes', 'cosmic strings',
    'neutrino astronomy', 'solar neutrino oscillations', 'radiation pressure in space', 'cosmic inflation models',
    'plasma astrophysics', 'solar limb observations', 'supernova shock waves', 'solar cycle variations',
    'galactic center observations', 'dark energy constraints', 'orbital debris tracking', 'space debris removal methods',
    'planetary nebulae', 'dwarf galaxies', 'microlensing events', 'solar magnetic field reversals',
    'cosmic gamma-ray background', 'galactic cosmic rays', 'supernova explosions', 'quantum fluctuations in the early universe',
    'neutron star atmospheres', 'helium abundance in stars', 'cosmic shear surveys', 'solar coronal mass ejections',
    'planetesimal formation', 'stellar activity cycles', 'cosmic microwave background anomalies', 'quantum tunnelling in astrophysics',
    'helioseismic inversions', 'supernova light curves', 'stellar metallicity', 'dark matter distribution in galaxies',
    'neutrino oscillation experiments', 'cosmic neutrino background', 'solar magnetic field topology', 'cosmic void dynamics',
    'interstellar scintillation', 'cosmic ray modulation', 'solar granulation patterns', 'planetary ring dynamics',
    'stellar occultations', 'dark matter annihilations', 'neutron star mergers as kilonovae', 'solar atmospheric heating',
    'cosmic inflation predictions', 'quantum entanglement in quantum gravity', 'stellar winds', 'neutrino flavor oscillations',
    'cosmic dust in the interstellar medium', 'magnetic fields in protostellar clouds', 'solar supergranulation',
    'helium recombination in the early universe', 'dark matter particle candidates', 'exoplanet habitability',
    'gamma-ray astronomy observatories', 'supernova nucleosynthesis', 'quantum tunnelling in stellar interiors',
    'helium enrichment in galaxies', 'neutrino oscillation patterns', 'cosmic void surveys', 'solar radio emissions',
    'planet formation in protoplanetary disks', 'stellar convection zones', 'dark energy models', 'orbital debris collision risk',
    'space-based gravitational wave detectors', 'planetary migration theories', 'solar neutrino flux variations',
    'cosmic ray isotopic composition', 'galactic magnetic field reversals', 'supernova remnant shocks',
    'quantum coherence in cosmic scales', 'helioseismology techniques', 'stellar accretion disks', 'neutrino mass hierarchy',
    'cosmic microwave background polarization anomalies', 'dark matter interactions with ordinary matter',
    'exoplanet atmosphere composition', 'habitable exomoons', 'orbital debris disposal methods', 'solar magnetic activity cycles',
    'stellar population synthesis', 'neutrino scattering experiments', 'cosmic ray propagation models',
    'galactic cosmic ray acceleration', 'supernova neutrinos', 'quantum entanglement in black hole thermodynamics',
    'helium abundance in quasar spectra', 'dark matter decays', 'neutron star cooling', 'solar prominence eruptions',
    'cosmic gamma-ray bursts', 'planetary ring compositions', 'stellar magnetic activity cycles', 'cosmic void simulations',
    'interstellar polarization', 'cosmic ray showers in the atmosphere', 'solar granulation lifetimes',
    'dark matter substructure', 'exoplanet habitability zones', 'gamma-ray bursts afterglows', 'supernova nucleosynthesis yields',
    'quantum entanglement in wormholes', 'helioseismic inversions techniques', 'stellar magnetic field evolution',
    'neutrino oscillation experiments in space', 'cosmic neutrino oscillations', 'solar coronal heating mechanisms',
    'cosmic void evolution', 'interstellar scintillation observations', 'cosmic ray modulation effects', 'solar granulation patterns',
    'planetary ring dynamics simulations', 'stellar magnetic field reversals', 'dark matter indirect detection experiments',
    'neutron star mergers as gravitational wave sources', 'helium enrichment in the intergalactic medium',
    'supernova nucleosynthesis in massive stars', 'quantum coherence in cosmic structures', 'helioseismic inversions applications',
    'stellar accretion disk instabilities', 'neutrino oscillation experiments on Earth', 'cosmic microwave background polarization measurements',
    'dark matter in the Milky Way halo', 'exoplanet atmosphere escape', 'habitable exoplanets detection methods', 'orbital debris mitigation strategies',
    'solar magnetic activity cycles variations', 'stellar population synthesis models', 'neutrino scattering experiments with astrophysical neutrinos',
    'cosmic ray propagation models in the interstellar medium', 'galactic cosmic ray acceleration mechanisms', 'supernova neutrinos detection methods',
    'quantum entanglement in black hole information paradox', 'helium abundance in quasar spectra variations', 'dark matter decays in galaxies',
    'neutron star cooling models', 'solar prominence eruptions mechanisms', 'cosmic gamma-ray bursts observations', 'planetary ring compositions analysis',
    'stellar magnetic activity cycles variations', 'cosmic void simulations methods', 'interstellar polarization measurements', 'cosmic ray showers in the atmosphere observations',
    'solar granulation lifetimes variations', 'dark matter substructure simulations', 'exoplanet habitability zones variations', 'gamma-ray bursts afterglows observations',
    'supernova nucleosynthesis yields variations', 'quantum entanglement in wormholes applications', 'helioseismic inversions techniques improvements',
    'stellar magnetic field evolution simulations'
]

# These keywords will be used while querying for the research papers.

In [3]:
# Setting up the credentials 

api_token  =  'aramvBIBu9gBnsShqXquy0HzVh2x9D6uIu6qNVeQ'

In [4]:
# Function to Extract information / Data 
def fetch_data(keyword , start = 0 , rows = 2000 ):
    '''
    Extracting information from only those research papers which has been published after 1990 and before 2021
    '''
    # Making the query:
    encoded_query = urlencode({ "q": 'year:1990-2021' + " " + keyword,
                                "fl": "bibcode, id, eprint, author, title, year, doi, keyword, abstract, classic_factor, citation_count, read_count, reference_count, readers,metrics",
                                "rows": rows,
                                "start": start,
                                "sort": "classic_factor desc"
                              })
    try: 
      # Making an API request
      results = requests.get(f"https://api.adsabs.harvard.edu/v1/search/query?{encoded_query}",
                            headers={'Authorization': 'Bearer ' + api_token})

      # returns the dictionary of all the responses if API returns the request for the query
      if results.status_code == 200:
        return results.json()['response']['docs']
      
      # if request get denied
      else :
        return 'request denied'
      
    except:
       return 'request denied'

In [6]:
# Fetching all the responses for all the by querying for all Keywords:
all_data = []

for i , keyword in enumerate(query_keywords):
    
    # Fetching information by calling 'fetch_data' function 
    response  = fetch_data(str(keyword))

    if response != 'request denied':

        # appending the responsed in all_data list
        all_data.append(response)

    print(f'{i} : done')


0 : done
1 : done
2 : done
3 : done
4 : done
5 : done
6 : done
7 : done
8 : done
9 : done
10 : done
11 : done
12 : done
13 : done
14 : done
15 : done
16 : done
17 : done
18 : done
19 : done
20 : done
21 : done
22 : done
23 : done
24 : done
25 : done
26 : done
27 : done
28 : done
29 : done
30 : done
31 : done
32 : done
33 : done
34 : done
35 : done
36 : done
37 : done
38 : done
39 : done
40 : done
41 : done
42 : done
43 : done
44 : done
45 : done
46 : done
47 : done
48 : done
49 : done
50 : done
51 : done
52 : done
53 : done
54 : done
55 : done
56 : done
57 : done
58 : done
59 : done
60 : done
61 : done
62 : done
63 : done
64 : done
65 : done
66 : done
67 : done
68 : done
69 : done
70 : done
71 : done
72 : done
73 : done
74 : done
75 : done
76 : done
77 : done
78 : done
79 : done
80 : done
81 : done
82 : done
83 : done
84 : done
85 : done
86 : done
87 : done
88 : done
89 : done
90 : done
91 : done
92 : done
93 : done
94 : done
95 : done
96 : done
97 : done
98 : done
99 : done
100 : done

In [7]:
# Once, we have got all the necessary information in a list, we will now move ahead and create a Dataframe:
dicts = []

for array in  all_data:
    for i in array:
        dicts.append(i)
np.array(dicts[:10])

array([{'bibcode': '1998AJ....116.1009R', 'abstract': "We present spectral and photometric observations of 10 Type Ia supernovae (SNe Ia) in the redshift range 0.16 &lt;= z &lt;= 0.62. The luminosity distances of these objects are determined by methods that employ relations between SN Ia luminosity and light curve shape. Combined with previous data from our High-z Supernova Search Team and recent results by Riess et al., this expanded set of 16 high-redshift supernovae and a set of 34 nearby supernovae are used to place constraints on the following cosmological parameters: the Hubble constant (H_0), the mass density (Omega_M), the cosmological constant (i.e., the vacuum energy density, Omega_Lambda), the deceleration parameter (q_0), and the dynamical age of the universe (t_0). The distances of the high-redshift SNe Ia are, on average, 10%-15% farther than expected in a low mass density (Omega_M = 0.2) universe without a cosmological constant. Different light curve fitting methods, SN 

In [19]:
# Creating Pandas dataframe from the list of Dictionaries:
df = pd.DataFrame.from_dict(list(dicts))

# droping duplicate instances based on 'bibcode'
df = df.drop_duplicates(subset=['bibcode'])


# Adding one more column having the arXiv PDF downloadable link
df['PDF_link_url'] = df['bibcode'].apply(lambda x: f"https://ui.adsabs.harvard.edu/link_gateway/{x}/EPRINT_PDF")

print (df.shape)

df.head()

(298863, 12)


Unnamed: 0,bibcode,abstract,author,doi,id,keyword,title,year,read_count,classic_factor,citation_count,PDF_link_url
0,1998AJ....116.1009R,We present spectral and photometric observatio...,"[Riess, Adam G., Filippenko, Alexei V., Challi...","[10.1086/300499, 10.48550/arXiv.astro-ph/9805201]",4999936,"[COSMOLOGY: OBSERVATIONS, STARS: SUPERNOVAE: G...",[Observational Evidence from Supernovae for an...,1998,251.0,20913.0,15232.0,https://ui.adsabs.harvard.edu/link_gateway/199...
1,2007NatMa...6..183G,Graphene is a rapidly rising star on the horiz...,"[Geim, A. K., Novoselov, K. S.]",[10.1038/nmat1849],8078344,,[The rise of graphene],2007,1.0,20865.0,14896.0,https://ui.adsabs.harvard.edu/link_gateway/200...
2,1999ApJ...517..565P,"We report measurements of the mass density, Ω<...","[Perlmutter, S., Aldering, G., Goldhaber, G., ...","[10.1086/307221, 10.48550/arXiv.astro-ph/9812133]",2787197,"[COSMOLOGY: OBSERVATIONS, COSMOLOGY: DISTANCE ...",[Measurements of Ω and Λ from 42 High-Redshift...,1999,192.0,20861.0,14872.0,https://ui.adsabs.harvard.edu/link_gateway/199...
3,2018A&A...616A...1G,Context. We present the second Gaia data relea...,"[Gaia Collaboration, Brown, A. G. A., Vallenar...","[10.1051/0004-6361/201833051, 10.48550/arXiv.1...",15229395,"[catalogs, astrometry, techniques: radial velo...",[Gaia Data Release 2. Summary of the contents ...,2018,224.0,20516.0,6977.0,https://ui.adsabs.harvard.edu/link_gateway/201...
4,2016PhRvL.116f1102A,"On September 14, 2015 at 09:50:45 UTC the two ...","[Abbott, B. P., Abbott, R., Abbott, T. D., Abe...","[10.1103/PhysRevLett.116.061102, 10.48550/arXi...",1360978,"[General Relativity and Quantum Cosmology, Ast...",[Observation of Gravitational Waves from a Bin...,2016,244.0,20398.0,9611.0,https://ui.adsabs.harvard.edu/link_gateway/201...


In [20]:
# Saving Dataframe:
df.to_csv("D:\GITHUB REPOS\ML_Space_Scribe\Generated_Data\Dataframe_Papers.csv", index=False)