# Recomendador de artigos - Open Alex e GPT

In [1]:
import requests
import pandas as pd
from datetime import datetime,timedelta,date
import os

pd.options.display.max_columns = 999

In [82]:
#Se eu escolher extrair por mês

def obter_datas_mes(mes, ano):
    primeiro_dia = datetime(ano, mes, 1).strftime('%Y-%m-%d')
    if mes == 12:
        proximo_mes = datetime(ano + 1, 1, 1)
    else:
        proximo_mes = datetime(ano, mes + 1, 1)
    ultimo_dia_mes_atual = (proximo_mes - timedelta(days=1)).strftime('%Y-%m-%d')
    
    return primeiro_dia, ultimo_dia_mes_atual

#Se eu escolher extrair por semana

def obter_data_semana():
    
    data_fim = date.today()
    data_inicio = data_fim - timedelta(days=7)
    
    data_fim_formatada = data_fim.strftime('%Y-%m-%d')
    data_inicio_formatada = data_inicio.strftime('%Y-%m-%d')
    
    return data_inicio_formatada,data_fim_formatada


In [69]:
def coletar_dados_e_salvar(data_inicial,data_fim):
    
    cursor = '*'
            
    contador = 1
    contador_erro = 0
    
    diretorio = f'datasets_{data_inicial}_to_{data_fim}'
    
    if not os.path.exists(diretorio):
        os.makedirs(diretorio)
    
    while cursor != None:
        url = f'https://api.openalex.org/works?filter=from_publication_date:{data_inicial},to_publication_date:{data_fim},type:Article&per-page=200&cursor={cursor}'

        
        try:
            requisicao = requests.get(url)
            pagina_com_resultados = requisicao.json()
            
        except (requests.exceptions.RequestException, ValueError) as e:
            contador_erro += 1 
            print(f"Erro na página {contador} (Erro {contador_erro}): {e}")
            continue 
        
        resultados = pagina_com_resultados.get('results', [])
        
        df = pd.DataFrame(resultados)
        parquet_arquivo = os.path.join(diretorio, f'registros_{data_inicial}_to_{data_fim}_{contador}.parquet')
        df.to_parquet(parquet_arquivo, index=False)
        
        cursor = pagina_com_resultados['meta'].get('next_cursor')
        
        contador += 1
    
    print(f"Total de erros: {contador_erro}")

In [89]:
def concatenar_arquivos_parquet(folder_path):

    dataframes = []

    for filename in os.listdir(folder_path):
        if filename.endswith('.parquet'):
            file_path = os.path.join(folder_path, filename)
            df = pd.read_parquet(file_path)
            dataframes.append(df)

    df_concatenado = pd.concat(dataframes, ignore_index=True)
    
    df_concatenado.to_parquet('df_concatenado.parquet',index=False)
    

In [186]:
def extrair_concepts_scores(df):
    concept_data = []

    for concepts in df['concepts']:
        concept_scores = {}
        for concept in concepts:
            if concept['level'] < 3 and concept['score'] > 0.51:
                concept_scores[concept['display_name']] = concept['score']
        concept_data.append(concept_scores)

    df_concepts = pd.DataFrame(concept_data, index=df.index).fillna(0).round(4)

    df_final = pd.concat([df, df_concepts], axis=1)

    return df_final

In [83]:
data_inicial , data_final = obter_data_semana()

In [88]:
#coletar_dados_e_salvar(data_inicial,data_final)

Total de erros: 0


In [123]:
df = pd.read_parquet('datasets_2023-09-21_to_2023-09-28/registros_2023-09-21_to_2023-09-28_1.parquet')

df = df.loc[:,['doi','title','publication_date','created_date','primary_location','open_access','concepts','cited_by_count']]

df['created_date'] = pd.to_datetime(df['created_date'])

df_filtrado = df.query('created_date.dt.month == 9')


In [124]:
df_filtrado['source_title'] = df_filtrado.loc[:,'primary_location'].apply(lambda x: x.get('source',None).get('display_name',None))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtrado['source_title'] = df_filtrado.loc[:,'primary_location'].apply(lambda x: x.get('source',None).get('display_name',None))


In [125]:
df_filtrado['open_access_status'] = df_filtrado.loc[:,'open_access'].apply(lambda x: x.get('is_oa',None))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtrado['open_access_status'] = df_filtrado.loc[:,'open_access'].apply(lambda x: x.get('is_oa',None))


In [187]:
df_final = extrair_concepts_scores(df_filtrado)

df_final.drop(columns=['primary_location','open_access','concepts'],inplace=True)

df_final

Unnamed: 0,doi,title,publication_date,created_date,cited_by_count,source_title,open_access_status,Euclidean geometry,Algorithm,Cancer research,Medicine,Uveitis,Guideline,Electric field,Bistability,Genetics,Biology,Pathogenicity,Computational biology,Cirrhosis,Lesion,Computer science,Face (sociological concept),Neuroscience,Autism,Microbiology,Chemistry,Dash,Wrist,Prosthesis,Surgery,Climatology,Pregnancy,Insulin,Endocrinology,Internal medicine,Follicular phase,Subcommissural organ,Cell biology,Cerebrospinal fluid,Atlas (anatomy),Artificial intelligence,Brain atlas,Neuroimaging,Gravitational wave,Physics,Binary number,Immune system,Immunology,Antigen,Vaccination,Population,Fusion,Psychology,Operationalization,Conceptualization,Compassion,Interpersonal communication,Mangrove,Biodiversity,Ecosystem,Environmental resource management,Tribe,Genus,Discontinuation,Stimulation,Gastroenterology,Serviceability (structure),Cantilever,Structural engineering,Stiffness,Limit state design,Gut flora,Conformal map,Mathematics,Curvature,Constant (computer programming),Tournament,Combinatorics
48,https://doi.org/10.2140/apde.2023.16.1485,Simplices in thin subsets of Euclidean spaces,2023-09-21,2020-09-14,1,Analysis & PDE,True,0.5652,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
54,https://doi.org/10.1007/s00222-023-01220-6,On the birational section conjecture with stro...,2023-09-26,2021-09-13,1,Inventiones Mathematicae,True,0.0,0.5585,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
61,https://doi.org/10.1002/hep.32781,c‐Rel–dependent Chk2 signaling regulates the D...,2023-09-27,2022-09-14,1,Hepatology,True,0.0,0.0,0.713,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
62,https://doi.org/10.5507/bp.2022.038,Trends in management of ocular syphilis in ter...,2023-09-21,2022-09-19,1,Biomedical Papers of the Faculty of Medicine o...,True,0.0,0.0,0.0,0.9037,0.8388,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
95,https://doi.org/10.1253/circj.cj-22-0794,JCS/JSCVS/JATS/JSVS 2020 Guideline on Diagnosi...,2023-09-25,2023-09-01,1,Circulation journal,True,0.0,0.0,0.0,0.7069,0.0,0.7331,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
96,https://doi.org/10.1088/1478-3975/acf8a4,Universal calcium fluctuations in hydra morpho...,2023-09-22,2023-09-12,1,Physical Biology,True,0.0,0.0,0.0,0.0,0.0,0.0,0.5553,0.5521,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
97,https://doi.org/10.1126/science.adg7492,Accurate proteome-wide missense variant effect...,2023-09-22,2023-09-20,1,Science,True,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5898,0.581,0.5745,0.5123,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
98,https://doi.org/10.1007/s00330-023-10226-w,Hepatobiliary phase imaging in cirrhotic patie...,2023-09-21,2023-09-21,1,European Radiology,False,0.0,0.0,0.0,0.7577,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7257,0.5283,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99,https://doi.org/10.1142/s0218213023500689,Development of Optimal Hyper-parameter Tuning-...,2023-09-21,2023-09-21,1,International Journal on Artificial Intelligen...,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8565,0.5756,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100,https://doi.org/10.7554/elife.83223,Dynamic top-down biasing implements rapid adap...,2023-09-21,2023-09-22,1,eLife,True,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5612,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [188]:
df_final.columns

Index(['doi', 'title', 'publication_date', 'created_date', 'cited_by_count',
       'source_title', 'open_access_status', 'Euclidean geometry', 'Algorithm',
       'Cancer research', 'Medicine', 'Uveitis', 'Guideline', 'Electric field',
       'Bistability', 'Genetics', 'Biology', 'Pathogenicity',
       'Computational biology', 'Cirrhosis', 'Lesion', 'Computer science',
       'Face (sociological concept)', 'Neuroscience', 'Autism', 'Microbiology',
       'Chemistry', 'Dash', 'Wrist', 'Prosthesis', 'Surgery', 'Climatology',
       'Pregnancy', 'Insulin', 'Endocrinology', 'Internal medicine',
       'Follicular phase', 'Subcommissural organ', 'Cell biology',
       'Cerebrospinal fluid', 'Atlas (anatomy)', 'Artificial intelligence',
       'Brain atlas', 'Neuroimaging', 'Gravitational wave', 'Physics',
       'Binary number', 'Immune system', 'Immunology', 'Antigen',
       'Vaccination', 'Population', 'Fusion', 'Psychology',
       'Operationalization', 'Conceptualization', 'Compassio

In [189]:
df_final.sort_values(by='Interpersonal communication',ascending=False)[['doi','title','Interpersonal communication','publication_date','created_date','cited_by_count',
                                                        'open_access_status']]

Unnamed: 0,doi,title,Interpersonal communication,publication_date,created_date,cited_by_count,open_access_status
115,https://doi.org/10.1007/s12671-023-02222-4,What Do (and Don’t) We Know About Self-Compass...,0.5306,2023-09-21,2023-09-22,1,True
48,https://doi.org/10.2140/apde.2023.16.1485,Simplices in thin subsets of Euclidean spaces,0.0,2023-09-21,2020-09-14,1,True
116,https://doi.org/10.1038/s41467-023-41333-3,Priority areas to protect mangroves and maximi...,0.0,2023-09-21,2023-09-22,1,True
110,https://doi.org/10.1103/physrevlett.131.121402,Gravitational-Wave Phasing of Quasicircular Co...,0.0,2023-09-21,2023-09-22,1,True
111,https://doi.org/10.1038/s41590-023-01613-y,Prior vaccination promotes early activation of...,0.0,2023-09-21,2023-09-22,1,False
112,https://doi.org/10.1038/s41590-023-01608-9,Multimodal single-cell datasets characterize a...,0.0,2023-09-21,2023-09-22,1,True
113,https://doi.org/10.1103/physrevlett.131.120604,Tailoring Fusion-Based Error Correction for Hi...,0.0,2023-09-21,2023-09-22,1,False
114,https://doi.org/10.1038/s41467-023-41408-1,Lola-I is a promoter pioneer factor that estab...,0.0,2023-09-21,2023-09-22,1,True
117,https://doi.org/10.1038/s41467-023-41354-y,mRNA vaccine quality analysis using RNA sequen...,0.0,2023-09-21,2023-09-22,1,True
54,https://doi.org/10.1007/s00222-023-01220-6,On the birational section conjecture with stro...,0.0,2023-09-26,2021-09-13,1,True


___

In [116]:
df_filtrado

Unnamed: 0,doi,title,publication_date,created_date,primary_location,authorships,open_access,concepts,sustainable_development_goals,cited_by_count,counts_by_year,source_title
48,https://doi.org/10.2140/apde.2023.16.1485,Simplices in thin subsets of Euclidean spaces,2023-09-21,2020-09-14,"{'is_accepted': True, 'is_oa': True, 'is_publi...","[{'author': {'display_name': 'Alex Iosevich', ...","{'any_repository_has_fulltext': False, 'is_oa'...","[{'display_name': 'Euclidean geometry', 'id': ...","[{'display_name': 'Reduced inequalities', 'id'...",1,"[{'cited_by_count': 1, 'year': 2021}]",Analysis & PDE
54,https://doi.org/10.1007/s00222-023-01220-6,On the birational section conjecture with stro...,2023-09-26,2021-09-13,"{'is_accepted': True, 'is_oa': True, 'is_publi...",[{'author': {'display_name': 'Giulio Bresciani...,"{'any_repository_has_fulltext': False, 'is_oa'...","[{'display_name': 'Algorithm', 'id': 'https://...","[{'display_name': 'Reduced inequalities', 'id'...",1,"[{'cited_by_count': 1, 'year': 2022}]",Inventiones Mathematicae
61,https://doi.org/10.1002/hep.32781,c‐Rel–dependent Chk2 signaling regulates the D...,2023-09-27,2022-09-14,"{'is_accepted': True, 'is_oa': True, 'is_publi...","[{'author': {'display_name': 'Jack Leslie', 'i...","{'any_repository_has_fulltext': True, 'is_oa':...","[{'display_name': 'DNA damage', 'id': 'https:/...",[{'display_name': 'Good health and well-being'...,1,"[{'cited_by_count': 1, 'year': 2023}]",Hepatology
62,https://doi.org/10.5507/bp.2022.038,Trends in management of ocular syphilis in ter...,2023-09-21,2022-09-19,"{'is_accepted': True, 'is_oa': True, 'is_publi...","[{'author': {'display_name': 'Aneta Klímová', ...","{'any_repository_has_fulltext': False, 'is_oa'...","[{'display_name': 'Medicine', 'id': 'https://o...","[{'display_name': 'Gender equality', 'id': 'ht...",1,"[{'cited_by_count': 1, 'year': 2023}]",Biomedical Papers of the Faculty of Medicine o...
95,https://doi.org/10.1253/circj.cj-22-0794,JCS/JSCVS/JATS/JSVS 2020 Guideline on Diagnosi...,2023-09-25,2023-09-01,"{'is_accepted': True, 'is_oa': True, 'is_publi...","[{'author': {'display_name': 'Hitoshi Ogino', ...","{'any_repository_has_fulltext': False, 'is_oa'...","[{'display_name': 'Aortic dissection', 'id': '...",[{'display_name': 'Good health and well-being'...,1,[],Circulation journal
96,https://doi.org/10.1088/1478-3975/acf8a4,Universal calcium fluctuations in hydra morpho...,2023-09-22,2023-09-12,"{'is_accepted': True, 'is_oa': True, 'is_publi...","[{'author': {'display_name': 'Oded Agam', 'id'...","{'any_repository_has_fulltext': False, 'is_oa'...","[{'display_name': 'Morphogenesis', 'id': 'http...",[],1,[],Physical Biology
97,https://doi.org/10.1126/science.adg7492,Accurate proteome-wide missense variant effect...,2023-09-22,2023-09-20,"{'is_accepted': True, 'is_oa': True, 'is_publi...","[{'author': {'display_name': 'Jianghua Cheng',...","{'any_repository_has_fulltext': False, 'is_oa'...","[{'display_name': 'Missense mutation', 'id': '...","[{'display_name': 'Life in Land', 'id': 'https...",1,[],Science
98,https://doi.org/10.1007/s00330-023-10226-w,Hepatobiliary phase imaging in cirrhotic patie...,2023-09-21,2023-09-21,"{'is_accepted': False, 'is_oa': False, 'is_pub...","[{'author': {'display_name': 'Sungjin Yoon', '...","{'any_repository_has_fulltext': False, 'is_oa'...","[{'display_name': 'Medicine', 'id': 'https://o...",[{'display_name': 'Affordable and clean energy...,1,[],European Radiology
99,https://doi.org/10.1142/s0218213023500689,Development of Optimal Hyper-parameter Tuning-...,2023-09-21,2023-09-21,"{'is_accepted': False, 'is_oa': False, 'is_pub...","[{'author': {'display_name': 'Tarun Yadav', 'i...","{'any_repository_has_fulltext': False, 'is_oa'...","[{'display_name': 'Computer science', 'id': 'h...","[{'display_name': 'Climate action', 'id': 'htt...",1,[],International Journal on Artificial Intelligen...
100,https://doi.org/10.7554/elife.83223,Dynamic top-down biasing implements rapid adap...,2023-09-21,2023-09-22,"{'is_accepted': True, 'is_oa': True, 'is_publi...","[{'author': {'display_name': 'Lucas Y Tian', '...","{'any_repository_has_fulltext': False, 'is_oa'...","[{'display_name': 'Neuroscience', 'id': 'https...","[{'display_name': 'Quality Education', 'id': '...",1,[],eLife


In [48]:
df = pd.read_parquet('datasets_2023-09-21_to_2023-09-28/registros_2023-09-21_to_2023-09-28_1.parquet',sep=';')
df

#df.loc[:,['doi','title','publication_date','authorships','cited_by_count','primary_location','concepts','counts_by_year',]]

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,language,primary_location,type,type_crossref,open_access,authorships,countries_distinct_count,institutions_distinct_count,corresponding_author_ids,corresponding_institution_ids,apc_list,apc_paid,has_fulltext,cited_by_count,biblio,is_retracted,is_paratext,concepts,mesh,locations_count,locations,best_oa_location,sustainable_development_goals,grants,referenced_works_count,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,fulltext_origin
0,https://openalex.org/W2993340759,https://doi.org/10.32920/24201303,The Disciplinary Boundaries of Canadian Identi...,The Disciplinary Boundaries of Canadian Identi...,2023,2023-09-26,{'openalex': 'https://openalex.org/W2993340759...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",article,posted-content,"{'is_oa': True, 'oa_status': 'closed', 'oa_url...","[{'author_position': 'first', 'author': {'id':...",1,1,['https://openalex.org/A5059070857'],['https://openalex.org/I530967'],,,False,28,"{'volume': None, 'issue': None, 'first_page': ...",False,False,"[{'id': 'https://openalex.org/C542530943', 'wi...",[],1,"[{'is_oa': True, 'landing_page_url': 'https://...","{'is_oa': True, 'landing_page_url': 'https://d...","[{'id': 'https://metadata.un.org/sdg/10', 'dis...",[],12,"['https://openalex.org/W1561274137', 'https://...","['https://openalex.org/W106442999', 'https://o...",https://api.openalex.org/works/W2993340759/ngrams,"{'&lt;p&gt;This': [0], 'article': [1, 133], 'a...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2020, 'cited_by_count': 2}, {'year':...",2023-09-27T06:03:59.356191,2019-12-13,
1,https://openalex.org/W4220965327,https://doi.org/10.1145/3527174,Explanation-driven HCI Model to Examine the Mi...,Explanation-driven HCI Model to Examine the Mi...,2023,2023-09-26,{'openalex': 'https://openalex.org/W4220965327...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",article,journal-article,"{'is_oa': True, 'oa_status': 'bronze', 'oa_url...","[{'author_position': 'first', 'author': {'id':...",4,4,[],[],,,True,12,"{'volume': '20', 'issue': '2', 'first_page': '...",False,False,"[{'id': 'https://openalex.org/C41008148', 'wik...",[],1,"[{'is_oa': True, 'landing_page_url': 'https://...","{'is_oa': True, 'landing_page_url': 'https://d...","[{'id': 'https://metadata.un.org/sdg/16', 'dis...",[],36,"['https://openalex.org/W54243233', 'https://op...","['https://openalex.org/W1996541855', 'https://...",https://api.openalex.org/works/W4220965327/ngrams,"{'Directing': [0], 'research': [1, 48, 98], 'o...",https://api.openalex.org/works?filter=cites:W4...,"[{'year': 2023, 'cited_by_count': 4}, {'year':...",2023-09-28T07:12:50.204065,2022-04-03,pdf
2,https://openalex.org/W2995926841,https://doi.org/10.3390/e25101367,Learning Energy-Based Models in High-Dimension...,Learning Energy-Based Models in High-Dimension...,2023,2023-09-22,{'openalex': 'https://openalex.org/W2995926841...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",article,journal-article,"{'is_oa': True, 'oa_status': 'closed', 'oa_url...","[{'author_position': 'first', 'author': {'id':...",1,4,['https://openalex.org/A5012657557'],"['https://openalex.org/I4210145836', 'https://...","{'value': 2000, 'currency': 'CHF', 'value_usd'...","{'value': 2000, 'currency': 'CHF', 'value_usd'...",False,7,"{'volume': '25', 'issue': '10', 'first_page': ...",False,False,"[{'id': 'https://openalex.org/C163294075', 'wi...",[],2,"[{'is_oa': True, 'landing_page_url': 'https://...","{'is_oa': True, 'landing_page_url': 'https://d...","[{'id': 'https://metadata.un.org/sdg/7', 'disp...",[],19,"['https://openalex.org/W602904462', 'https://o...","['https://openalex.org/W2033914206', 'https://...",https://api.openalex.org/works/W2995926841/ngrams,"{'Energy-Based': [0], 'Models': [1], '(EBMs)':...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 3}, {'year':...",2023-09-23T07:11:24.257720,2019-12-26,
3,https://openalex.org/W2289392542,https://doi.org/10.5038/druj6356,"Design, Fabrication, and Characterization of a...","Design, Fabrication, and Characterization of a...",2023,2023-09-21,{'openalex': 'https://openalex.org/W2289392542...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",article,proceedings-article,"{'is_oa': True, 'oa_status': 'closed', 'oa_url...","[{'author_position': 'first', 'author': {'id':...",1,1,[],[],,,False,6,"{'volume': None, 'issue': None, 'first_page': ...",False,False,"[{'id': 'https://openalex.org/C136525101', 'wi...",[],1,"[{'is_oa': True, 'landing_page_url': 'https://...","{'is_oa': True, 'landing_page_url': 'https://d...","[{'id': 'https://metadata.un.org/sdg/7', 'disp...",[],4,"['https://openalex.org/W1499696721', 'https://...","['https://openalex.org/W2024534785', 'https://...",https://api.openalex.org/works/W2289392542/ngrams,"{'In': [0], 'this': [1], 'work,': [2], 'we': [...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2012, 'cited_by_count': 1}]",2023-09-27T14:53:07.062767,2016-06-24,
4,https://openalex.org/W4213435830,https://doi.org/10.1145/3519026,Designing Creative AI Partners with COFI: A Fr...,Designing Creative AI Partners with COFI: A Fr...,2023,2023-09-23,{'openalex': 'https://openalex.org/W4213435830...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",article,journal-article,"{'is_oa': True, 'oa_status': 'bronze', 'oa_url...","[{'author_position': 'first', 'author': {'id':...",1,1,[],[],,,True,6,"{'volume': '30', 'issue': '5', 'first_page': '...",False,False,"[{'id': 'https://openalex.org/C11012388', 'wik...",[],3,"[{'is_oa': True, 'landing_page_url': 'https://...","{'is_oa': True, 'landing_page_url': 'https://d...","[{'id': 'https://metadata.un.org/sdg/17', 'dis...",[],94,"['https://openalex.org/W5031869', 'https://ope...","['https://openalex.org/W1479828534', 'https://...",https://api.openalex.org/works/W4213435830/ngrams,"{'Human-AI': [0], 'co-creativity': [1, 60, 82,...",https://api.openalex.org/works?filter=cites:W4...,"[{'year': 2023, 'cited_by_count': 3}, {'year':...",2023-09-25T06:20:23.363760,2022-02-25,pdf
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,https://openalex.org/W3180036549,https://doi.org/10.1103/physrevapplied.20.034062,Hybrid Quantum-Classical Heuristic to Solve La...,Hybrid Quantum-Classical Heuristic to Solve La...,2023,2023-09-26,{'openalex': 'https://openalex.org/W3180036549...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",article,journal-article,"{'is_oa': True, 'oa_status': 'closed', 'oa_url...","[{'author_position': 'first', 'author': {'id':...",1,1,[],[],,,False,0,"{'volume': '20', 'issue': '3', 'first_page': N...",False,False,"[{'id': 'https://openalex.org/C126255220', 'wi...",[],1,"[{'is_oa': True, 'landing_page_url': 'https://...","{'is_oa': True, 'landing_page_url': 'https://d...","[{'id': 'https://metadata.un.org/sdg/16', 'dis...",[{'funder': 'https://openalex.org/F4320322581'...,42,"['https://openalex.org/W1828885239', 'https://...","['https://openalex.org/W1834706786', 'https://...",https://api.openalex.org/works/W3180036549/ngrams,"{'We': [0, 84, 213, 229], 'present': [1], 'a':...",https://api.openalex.org/works?filter=cites:W3...,[],2023-09-27T07:36:37.534555,2021-07-19,
196,https://openalex.org/W3180653767,https://doi.org/10.1080/23307706.2023.2255594,Deterministic differential games in infinite h...,Deterministic differential games in infinite h...,2023,2023-09-24,{'openalex': 'https://openalex.org/W3180653767...,en,"{'is_oa': False, 'landing_page_url': 'https://...",article,journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...","[{'author_position': 'first', 'author': {'id':...",1,1,"['https://openalex.org/A5007878362', 'https://...","['https://openalex.org/I4210088687', 'https://...",,,False,0,"{'volume': None, 'issue': None, 'first_page': ...",False,False,"[{'id': 'https://openalex.org/C144237770', 'wi...",[],1,"[{'is_oa': False, 'landing_page_url': 'https:/...",,"[{'id': 'https://metadata.un.org/sdg/16', 'dis...",[{'funder': 'https://openalex.org/F4320323771'...,41,"['https://openalex.org/W1487586009', 'https://...","['https://openalex.org/W1594313668', 'https://...",https://api.openalex.org/works/W3180653767/ngrams,"{'ABSTRACTWe': [0], 'study': [1], 'a': [2, 70,...",https://api.openalex.org/works?filter=cites:W3...,[],2023-09-25T05:39:52.846705,2021-07-19,
197,https://openalex.org/W3181261771,https://doi.org/10.1007/s10589-023-00524-w,Equilibrium modeling and solution approaches i...,Equilibrium modeling and solution approaches i...,2023,2023-09-25,{'openalex': 'https://openalex.org/W3181261771...,en,"{'is_oa': False, 'landing_page_url': 'https://...",article,journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...","[{'author_position': 'first', 'author': {'id':...",1,1,['https://openalex.org/A5020564160'],['https://openalex.org/I923382979'],"{'value': 2390, 'currency': 'EUR', 'value_usd'...","{'value': 2390, 'currency': 'EUR', 'value_usd'...",False,0,"{'volume': None, 'issue': None, 'first_page': ...",False,False,"[{'id': 'https://openalex.org/C126255220', 'wi...",[],1,"[{'is_oa': False, 'landing_page_url': 'https:/...",,"[{'id': 'https://metadata.un.org/sdg/10', 'dis...",[],44,"['https://openalex.org/W54816854', 'https://op...","['https://openalex.org/W1581956489', 'https://...",https://api.openalex.org/works/W3181261771/ngrams,,https://api.openalex.org/works?filter=cites:W3...,[],2023-09-26T05:40:11.452966,2021-07-19,
198,https://openalex.org/W3181836146,https://doi.org/10.22323/1.444.0008,Highlights from the Telescope Array Experiment,Highlights from the Telescope Array Experiment,2023,2023-09-25,{'openalex': 'https://openalex.org/W3181836146...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",article,proceedings-article,"{'is_oa': True, 'oa_status': 'closed', 'oa_url...","[{'author_position': 'first', 'author': {'id':...",1,1,['https://openalex.org/A5044634236'],['https://openalex.org/I74801974'],,,False,0,"{'volume': None, 'issue': None, 'first_page': ...",False,False,"[{'id': 'https://openalex.org/C121332964', 'wi...",[],1,"[{'is_oa': True, 'landing_page_url': 'https://...","{'is_oa': True, 'landing_page_url': 'https://d...","[{'id': 'https://metadata.un.org/sdg/7', 'disp...",[],0,[],"['https://openalex.org/W1629817918', 'https://...",https://api.openalex.org/works/W3181836146/ngrams,"{'The': [0, 88, 116], 'Telescope': [1], 'Array...",https://api.openalex.org/works?filter=cites:W3...,[],2023-09-26T06:16:23.174840,2021-07-19,


In [37]:
df.shape

(25, 1764)

In [34]:
for i in df.columns:
    print(i)

id
doi
title
display_name
publication_year
publication_date
language
type
type_crossref
authorships
countries_distinct_count
institutions_distinct_count
corresponding_author_ids
corresponding_institution_ids
apc_list
apc_paid
has_fulltext
cited_by_count
is_retracted
is_paratext
concepts
mesh
locations_count
locations
sustainable_development_goals
grants
referenced_works_count
referenced_works
related_works
ngrams_url
cited_by_api_url
counts_by_year
updated_date
created_date
ids.openalex
ids.doi
ids.mag
primary_location.is_oa
primary_location.landing_page_url
primary_location.pdf_url
primary_location.source
primary_location.license
primary_location.version
primary_location.is_accepted
primary_location.is_published
open_access.is_oa
open_access.oa_status
open_access.oa_url
open_access.any_repository_has_fulltext
biblio.volume
biblio.issue
biblio.first_page
biblio.last_page
best_oa_location.is_oa
best_oa_location.landing_page_url
best_oa_location.pdf_url
best_oa_location.source
best_oa_lo