#OpenAlex
The SYNERGY datasets contain ids refering to the OpenAlex repository, from which the data can be downloaded.

## Load Datasets
Load the datasets into memory:

In [3]:
import os
import pandas as pd

data_directory_uniform = '../../../../data/02_uniform'

files = os.listdir(data_directory_uniform)
subjects = [file.split('_uniform')[0] for file in files]

uniform_datasets = {
    subjects[count]: pd.read_csv(f'{data_directory_uniform}/{file}')
    for count, file in enumerate(files)
}

## Query API
For each article, retrieve title & abstract by its OpenAlex-ID:

In [6]:
import tqdm, pyalex, 

for dataset, data in tqdm(uniform_datasets.items(), desc='Downloading datasets'):

    # add titles & abstracts as a whole column later
    titles=[]
    abstracts=[]

    for index, row in tqdm(data.iterrows(), total=len(data), desc=dataset):

        if(data.isna()['openalex_id'][index] == True):
            titles.append(pd.NA)
            abstracts.append(pd.NA)
        else:
            # retrieve title/abstract through the api
            openalex = pyalex.Works()[row['openalex_id']]
        
            titles.append(openalex['title'])
            abstracts.append(openalex['abstract'])

    data['title'] = titles
    data['abstract'] = abstracts

Downloading datasets:   0%|          | 0/6 [00:00<?, ?it/s]

adhd:   0%|          | 0/851 [00:00<?, ?it/s]

animal_depression:   0%|          | 0/1993 [00:00<?, ?it/s]

atypical_antipsychotics:   0%|          | 0/1120 [00:00<?, ?it/s]

calcium_channel_blockers:   0%|          | 0/1218 [00:00<?, ?it/s]

oral_hypoglycemics:   0%|          | 0/503 [00:00<?, ?it/s]

pancreatic_surgery:   0%|          | 0/34206 [00:00<?, ?it/s]

## Save locally
Save the downloaded data locally:

In [7]:
directory_to_save = '../../../data/02_openalex'

[dataframe.to_csv(f'{directory_to_save}/{subject}_openalex.csv', index=False)
 for subject, dataframe in uniform_datasets.items()];