In [33]:
import requests
import json 
from tqdm import tqdm

token="your_token"

In [34]:
def relevant_citations(impact_factor_year, bibcode):
    payload = {"bibcodes": [bibcode],
               "types": ["histograms"],
              "histograms": ["citations"]}
    response = requests.post("https://api.adsabs.harvard.edu/v1/metrics", \
                           headers={'Authorization': 'Bearer ' + token, 
                                    "Content-type": "application/json"}, \
                           data=json.dumps(payload))
    if response.status_code == 200:
        try:
            temp = response.json()
            relevant_year = str(impact_factor_year - 1)
            citations_histogram = temp.get('histograms', {}).get('citations', {})

            if citations_histogram:
                refereed_to_refereed = citations_histogram.get('refereed to refereed', {}).get(relevant_year, 0)
                refereed_to_nonrefereed = citations_histogram.get('refereed to nonrefereed', {}).get(relevant_year, 0)
                nonrefereed_to_refereed = citations_histogram.get('nonrefereed to refereed', {}).get(relevant_year, 0)
                nonrefereed_to_nonrefereed = citations_histogram.get('nonrefereed to nonrefereed', {}).get(relevant_year, 0)
                
                num_relevant_citations = (
                    refereed_to_refereed +
                    refereed_to_nonrefereed +
                    nonrefereed_to_refereed +
                    nonrefereed_to_nonrefereed
                )
                
                return num_relevant_citations
            else:
                print(f"No citation data found for bibcode {bibcode}")
                return 0
        except json.JSONDecodeError as e:
            print(f"JSON decode error for bibcode {bibcode}: {e}")
            print(f"Response text: {response.text}")
            return 0
    else:
        print(f"Error {response.status_code} for bibcode {bibcode}: {response.text}")
        return -1    

In [35]:
def get_bibcodes(journal_name, impact_factor_year):
    
    headers = {
        'Authorization': f'Bearer {token}',
    }
    relevant_year_1 = str(impact_factor_year-2)
    relevant_year_2 = str(impact_factor_year-3)
    
    base_url = 'https://api.adsabs.harvard.edu/v1/search/query'
    query = f'bibstem:{journal_name} AND year:({relevant_year_1} OR {relevant_year_2}) AND collection:astronomy'

    rows = 2000  # Number of results per page
    start = 0  # Starting index for the results
    all_bibcodes = []

    while True:
        # Parameters for the API request
        params = {
            'q': query,
            'fl': 'bibcode',
            'rows': rows,
            'start': start,
        }

        response = requests.get(base_url, headers=headers, params=params)

        if response.status_code == 200:
            data = response.json()
            docs = data['response']['docs']

            if not docs:
                # No more results to fetch
                break

            # Extract bibcodes and add to the list
            bibcodes = [doc['bibcode'] for doc in docs]
            all_bibcodes.extend(bibcodes)

            # Increment the starting index for the next page
            start += rows
        else:
            print('Error:', response.status_code, response.text)
            break
    return all_bibcodes

In [36]:
def get_refereed_bibcodes(journal_name, impact_factor_year):
    
    headers = {
        'Authorization': f'Bearer {token}',
    }
    relevant_year_1 = str(impact_factor_year-2)
    relevant_year_2 = str(impact_factor_year-3)
    
    base_url = 'https://api.adsabs.harvard.edu/v1/search/query'
    query = f'bibstem:{journal_name} AND (year:({relevant_year_1} OR {relevant_year_2})) AND property:refereed AND collection:astronomy'

    rows = 2000  # Number of results per page
    start = 0  # Starting index for the results
    all_bibcodes = []

    while True:
        # Parameters for the API request
        params = {
            'q': query,
            'fl': 'bibcode',
            'rows': rows,
            'start': start,
        }

        response = requests.get(base_url, headers=headers, params=params)

        if response.status_code == 200:
            data = response.json()
            docs = data['response']['docs']

            if not docs:
                # No more results to fetch
                break

            # Extract bibcodes and add to the list
            bibcodes = [doc['bibcode'] for doc in docs]
            all_bibcodes.extend(bibcodes)

            # Increment the starting index for the next page
            start += rows
        else:
            print('Error:', response.status_code, response.text)
            break
    return all_bibcodes

In [37]:
journal_name = 'enter_journal_bibstem_here'
impact_factor_year = 2024

In [38]:
# Create the directory if it doesn't exist
import os

# os.makedirs(journal_name, exist_ok=True)

# Write the file

file_path = os.path.join(journal_name, f'{journal_name}_bibcodes_{impact_factor_year}.json')

bibcodes = get_bibcodes(journal_name, impact_factor_year)
with open(file_path, 'w') as f:
    json.dump(bibcodes, f)

In [39]:
len(bibcodes)

25

In [40]:
# Get the bibcodes for the journal and dump them to a json file
refereed_bibcodes = get_refereed_bibcodes(journal_name, impact_factor_year)


In [41]:
with open(f'{journal_name}/{journal_name}_refereed_bibcodes_{impact_factor_year}.json', 'w') as f:
    json.dump(refereed_bibcodes, f)

In [42]:
len(refereed_bibcodes)

25