In [10]:
import time
import requests

data_dict = {
    "id": {
        "original_name": "id",
        "dtype": "int64"
    },
    "name": {
        "original_name": "knownName.en",
        "dtype": "object"
    },
    "gender": {
        "original_name": "gender",
        "dtype": "category",
        "categories": [
            "female",
            "male"
        ]
    },
    "award_year": {
        "original_name": "nobelPrizes_1.awardYear",
        "dtype": "int64"
    },
    "birth_country": {
        "original_name": "birth.place.country.en",
        "dtype": "object"
    },
    "field": {
        "original_name": "nobelPrizes_1.category.en",
        "dtype": "category",
        "categories": [
            "Physics",
            "Chemistry",
            "Physiology or Medicine",
            "Economic Sciences"
        ]
    }
}

def get_given_names(year:int, field:str, count:int) -> dict:
    url = 'https://api.crossref.org/works'
    headers = {
        'User-Agent': 'RandomPhysicsPaperFetcher/1.0 (mailto:jipijipijipi@gmail.com)'
    }
    params = {
        'query': field,
        'filter': f'from-pub-date:{year}-01-01,until-pub-date:{year}-12-31',
        'filter': 'type:journal-article',
        'sample': '50'
    }

    response = requests.get(url, params=params, headers=headers)
    response.raise_for_status() 

    # Handle rate limiting based on headers
    rate_limit_remaining = int(response.headers.get('X-Rate-Limit-Remaining', 1))
    rate_limit_reset = int(response.headers.get('X-Rate-Limit-Reset', 1))

    if rate_limit_remaining == 0:
        time.sleep(rate_limit_reset)
        return

    data = response.json()
    items = data['message']['items']

    given_names = [] 
    for item in items:
        authors = item.get('author', [])
        if authors:
            for author in authors:
                given = author.get('given', '')
                
                if len(given) > 1 and not any(char in ['.', ' ','&'] for char in given):
                    given_names.append(given)
                
                if len(given_names) >= count:
                    return given_names
    authors = {'year': year, 'field': field, 'authors': given_names}
    return authors



names = get_given_names(2021, 'Physics', 10)

print(names)

def get_all_names(dictionary, starting_year) -> list:
    all_names = []
    
    for year in range(starting_year, time.localtime().tm_year - 1):
        for field in dictionary['field']['categories']:
            names = get_given_names(year, field, 10)
            all_names.append(names)


{'year': 2021, 'field': 'Physics', 'authors': ['Leto', 'Emily', 'William', 'Christine', 'Ke-Hsueh', 'Mario', 'Perry', 'Erik', 'Peter']}
{'year': 2021, 'field': 'Physics', 'authors': ['Leto', 'Emily', 'William', 'Christine', 'Ke-Hsueh', 'Mario', 'Perry', 'Erik', 'Peter']}
