In [1]:
from pyalex import Works, Authors, Sources, Institutions, Concepts, Publishers, Funders

In [2]:
import pyalex

pyalex.config.email = "quentin.glorieux@lkb.upmc.fr"

In [6]:
papers = Works().search_filter(title='a').get()
print(papers)

In [5]:
Works().search_filter(title="Temporally multiplexed storage of images ").get()

[{'id': 'https://openalex.org/W3098886231',
  'doi': 'https://doi.org/10.1364/oe.20.012350',
  'title': 'Temporally multiplexed storage of images in a gradient echo memory',
  'display_name': 'Temporally multiplexed storage of images in a gradient echo memory',
  'relevance_score': 614.8044,
  'publication_year': 2012,
  'publication_date': '2012-05-16',
  'ids': {'openalex': 'https://openalex.org/W3098886231',
   'doi': 'https://doi.org/10.1364/oe.20.012350',
   'mag': '3098886231',
   'pmid': 'https://pubmed.ncbi.nlm.nih.gov/22714222'},
  'language': 'en',
  'primary_location': {'is_oa': True,
   'landing_page_url': 'https://doi.org/10.1364/oe.20.012350',
   'pdf_url': None,
   'source': {'id': 'https://openalex.org/S178776955',
    'display_name': 'Optics Express',
    'issn_l': '1094-4087',
    'issn': ['1094-4087'],
    'is_oa': True,
    'is_in_doaj': True,
    'host_organization': 'https://openalex.org/P4310315679',
    'host_organization_name': 'Optica Publishing Group',
    'h

In [22]:
def get_single_paper():

    doi = "https://doi.org/10.1103/PhysRevA.84.053826"
    works = Works()[doi]
    return works

paper = get_single_paper()

In [24]:
print({
    'title': paper['title'],
    'link': {
        'url' : paper['doi'],
        "authors": [
            {"name": entry["author"]["display_name"], "orcid": entry["author"]["orcid"]}
            for entry in paper["authorships"]
        ],
        'display' :paper["primary_location"]["source"]["display_name"]
            + " "
            + str(paper["biblio"]["volume"])
            + " "
            + str(paper["biblio"]["issue"])
            + " ("
            + str(paper["publication_year"])
            + ").",
        },
    "doi": paper["doi"],
    "is_oa": paper["open_access"]["is_oa"],
    "oa_url": paper["open_access"]["oa_url"],
    "publication_year": paper["publication_year"],
    "journal": paper["primary_location"]["source"]["display_name"],
    "biblio": paper["biblio"],
    })

{'title': 'Quantum correlations by four-wave mixing in an atomic vapor in a nonamplifying regime: Quantum beam splitter for photons', 'link': {'url': 'https://doi.org/10.1103/physreva.84.053826', 'authors': [{'name': 'Quentin Glorieux', 'orcid': 'https://orcid.org/0000-0003-0903-0233'}, {'name': 'Luca Guidoni', 'orcid': None}, {'name': 'S. Guibal', 'orcid': None}, {'name': 'Jean-Pierre Likforman', 'orcid': 'https://orcid.org/0000-0002-8879-7813'}, {'name': 'Thomas Coudreau', 'orcid': None}], 'display': 'Physical Review A 84 5 (2011).'}, 'doi': 'https://doi.org/10.1103/physreva.84.053826', 'is_oa': True, 'oa_url': 'https://arxiv.org/pdf/1111.4103', 'publication_year': 2011, 'journal': 'Physical Review A', 'biblio': {'volume': '84', 'issue': '5', 'first_page': None, 'last_page': None}}


In [8]:
import re

def get_openalex_id(input_identifier):
    authors = Authors()
    if "0-" in input_identifier:  # Assuming it's an ORCID number
       orcid_url = f"https://orcid.org/{input_identifier}"
       openalex_url = authors[orcid_url]['id']
    else:
        name_result = authors.search_filter(display_name=input_identifier).get()
        openalex_url = name_result[0]['id']
        
    pattern = re.compile(r'A(\d+)')
    match = pattern.search(openalex_url)
    if match:
        extracted_id = match.group(1)
        return 'A' + extracted_id
    else:
        return    


## Fetch

In [111]:
import pandas as pd

pager = (
    Works()
    .filter(author={"id": "A5024990264"})
    .filter(has_doi=True, primary_location={"source": {"has_issn": True}})
    .select(
        [
            "id",
            "doi",
            "title",
            "publication_year",
            "ids",
            "type",
            "type_crossref",
            "open_access",
            "primary_location",
            "authorships",
            "biblio",
            "concepts",
        ]
    )
    .sort(publication_year="desc")
    .paginate(per_page=100)
)
list = []
for page in pager:
    list = list + page
df0 = pd.DataFrame(list)
df = df0

## Code to export the publication list

In [121]:
import json


# Define a function to extract author information
def extract_author_info(row):
    match = re.search(r'physrev(.+?)(\d+)', row["doi"])
    if match:
        if match.group(1)[0] not in ('l', 'r', 'x') : 
            journal_abbreviation = match.group(1)[0].capitalize()
        else: 
            journal_abbreviation = ""
    else:
        journal_abbreviation = ""
    return {
        "title": row["title"],
        "authors": [
            {"name": entry["author"]["display_name"], "orcid": entry["author"]["orcid"]}
            for entry in row["authorships"]
        ],
        "link": {
            "url": row["doi"],
            "display": row["primary_location"]["source"]["display_name"]
            + " "
            + str(row["biblio"]["volume"])
            + " "
            + str(row["biblio"]["issue"])
            + " ("
            + str(row["publication_year"])
            + ").",
        },
        "orcid": [entry["author"]["orcid"] for entry in row["authorships"]],
        "doi": row["doi"],
        "is_oa": row["open_access"]["is_oa"],
        "oa_url": row["open_access"]["oa_url"],
        "publication_year": row["publication_year"],
        "journal": row["primary_location"]["source"]["display_name"],
        "journal_abbreviation": journal_abbreviation,
        "biblio": row["biblio"],
    }


# Apply the function to each row and create a list of dictionaries
author_info_list = df.apply(extract_author_info, axis=1).tolist()

# Convert the list of dictionaries to a JSON string
json_string = json.dumps(author_info_list, indent=2)

# Save the JSON string to a file
with open("../_data/openalex_list.json", "w") as file:
    file.write(json_string)

## Tests

In [None]:
# IDs
expanded_df = pd.json_normalize(df['ids'])
result_df = pd.concat([df, expanded_df], axis=1)
result_df = result_df.drop(['ids', 'pmid', 'pmcid'], axis=1)

#Location
expanded_df = pd.json_normalize(df['primary_location'])
result_df = pd.concat([df, expanded_df], axis=1)
result_df = result_df.drop(['primary_location', 'license', 'version'], axis=1)
