In [9]:
from pyalex import Works
import pandas as pd

oa_id_list = ["W4383896166"]

def extract_refs(oa_id_list):
    """
    extracts references from manuscript (first layer) and references
    of those (second layer),
    based on OpenAlex ids retrieved from manuscript
    """

    first_layer_full = []
    for ref in range(0, len(oa_id_list), 25):
        subset = "|".join(oa_id_list[ref : ref + 25])
        first_layer = Works().filter(openalex=subset).get()
        first_layer = [ref["referenced_works"] for ref in first_layer]
        first_layer = list(
            {i for lists in first_layer for i in lists}
        )  # list comprehension returning a set
        first_layer_full.append(first_layer)
        # print(len(first_layer_full))

    first_layer_full.append(oa_id_list)

    first_layer_full = list(
        {i for lists in first_layer_full for i in lists}
    )  # list comprehension returning a set

    ## second layer
    second_layer = []
    for ref in range(0, len(first_layer_full), 25):
        subset = "|".join(first_layer_full[ref : ref + 25])
        second_layer.append(Works().filter(openalex=subset).get())

    final = [entry for lists in second_layer for entry in lists]
    return final


refs = extract_refs(oa_id_list)
refs = extract_refs([ref["id"] for ref in refs]) 

In [10]:
len(refs)

1056

In [14]:
def create_ref_csv(refs):
    """
    From OpenAlex references, extracts OpenAlex ID, abstracts,
    journal ISSN-L and all authors in correct order.
    Returns a dataframe
    """

    ids = [w["id"].split("/")[-1] for w in refs]
    abstracts = [w["abstract"] for w in refs]
    year = [w["publication_year"] for w in refs]
    
    journal = []
    for w in refs:
        if w["primary_location"]:
            if w["primary_location"]["source"]:
                if w["primary_location"]["source"]["issn_l"]:
                    journal.append(w["primary_location"]["source"]["issn_l"])
        
    authorships = [w["authorships"] for w in refs]
    authors = []
    for paper in authorships:
        authors_per_paper = []
        for w in paper:
            if w["author"]:
                if w["author"]["display_name"]:
                    authors_per_paper.append(w["author"]["display_name"])
        authors.append(authors_per_paper)
      
    return pd.DataFrame(
        list(zip(ids, year, journal, authors, abstracts)),
        columns=["oa_id", "year", "journal_issnl", "authors", "abstracts"],
    )


ref_csv = create_ref_csv(refs)

In [16]:
ref_csv

Unnamed: 0,oa_id,year,journal_issnl,authors,abstracts
0,W947539452,1996,0033-2909,[Alberto Melucci],In Challenging Codes Melucci brings an origina...
1,W2032344422,1988,0022-0485,"[Janet Shibley Hyde, Marcia C. Linn]",
2,W1767995805,1973,0013-0133,[Christopher Jencks],Most Americans say they believe in equality. B...
3,W2012521813,1991,0001-8392,"[Oded Stark, J. Edward Taylor]","Journal Article Migration Incentives, Migratio..."
4,W2326941496,1973,0197-9183,[Jeffrey Pfeffer],1 The author gratefully acknowledges the assis...
...,...,...,...,...,...
784,W2014724358,2006,0022-2801,"[Afndreas Wimmer, Brian Min]",The existing quantitative literature on war ta...
785,W600699417,1979,0034-6527,"[Paul Rabinow, William Michael Sullivan]",
786,W2797626461,1978,0021-9347,[Sandra Wallman],
787,W2119461238,2005,0266-7215,[Aaron Gullickson],Skin tone variation within the United States' ...


In [17]:
ref_csv.to_csv(index=False, path_or_buf = "abstracts_from_W4383896166.csv")