In [1]:
import pandas as pd
import numpy as np

import regex as re
import pickle

from datetime import datetime
from time import sleep

from elsapy.elsclient import ElsClient
from elsapy.elsprofile import ElsAuthor, ElsAffil
# from elsapy.elsdoc import FullDoc, AbsDoc
from elsapy.elssearch import ElsSearch

# from pybliometrics.scopus import AuthorSearch


In [2]:
# Restarted the PC, re-loading variables:

with open("author_out_list", "rb") as fp:
    author_out_list = pickle.load(fp)

with open("q1_first_author_df", "rb") as fp:
    q1_first_author_df = pickle.load(fp)

# with open("author_out_list_loop_save", "rb") as fp:
#     author_out_list = pickle.load(fp)

In [5]:
scopus_codes_dict = {'Agricultural and Biological Sciences': 'AGRI',
 'Arts and Humanities': 'ARTS',
 'Biochemistry, Genetics and Molecular Biology': 'BIOC',
 'Business, Management and Accounting': 'BUSI',
 'Chemical Engineering': 'CENG',
 'Chemistry': 'CHEM',
 'Computer Science': 'COMP',
 'Decision Sciences': 'DECI',
 'Dentistry': 'DENT',
 'Earth and Planetary Sciences': 'EART',
 'Economics, Econometrics and Finance': 'ECON',
 'Energy': 'ENER',
 'Engineering': 'ENGI',
 'Environmental Science': 'ENVI',
 'Health Professions': 'HEAL',
 'Immunology and Microbiology': 'IMMU',
 'Materials Science': 'MATE',
 'Mathematics': 'MATH',
 'Medicine': 'MEDI',
 'Neuroscience': 'NEUR',
 'Nursing': 'NURS',
 'Pharmacology, Toxicology and Pharmaceutics': 'PHAR',
 'Physics and Astronomy': 'PHYS',
 'Psychology': 'PSYC',
 'Social Sciences': 'SOCI',
 'Veterinary': 'VETE',
 'Multidisciplinary': 'MULT'}

def sbj_area_query_creator(sbj_list):
    query = " AND ".join([f"SUBJAREA({scopus_codes_dict[sbj]})" for sbj in sbj_list])
    return query

The main objective of this nb is to create a seperate author_df, to be used with Scopus API Author Search

### Part 1: Creating author_df

In [119]:
with open("q1_date_df", "rb") as fp:
    q1_date_df = pickle.load(fp)

KeyboardInterrupt: 

In [None]:
q1_date_df.shape

(156754, 20)

In [None]:
q1_date_df.author.map(type).value_counts()

<class 'list'>     156436
<class 'float'>       318
Name: author, dtype: int64

It should be noted that 249 artcls do not have any author information :(

In [None]:
q1_author_df = q1_date_df.reset_index().dropna(subset= "author").loc[:,["DOI", "JRNL_ID", "author"]].explode("author")

In [None]:
q1_author_df.reset_index(drop=True, inplace=True)

#### Expanding "author" column -> aut_df

In [None]:
aut_df = pd.DataFrame(list(q1_author_df['author']))

In [None]:
aut_df.affiliation.map(len).map(lambda x: 1 if x>0 else 0).value_counts()

0    530763
1    191411
Name: affiliation, dtype: int64

In [None]:
aut_df[aut_df.sequence == "first"].affiliation.map(len).map(lambda x: 1 if x>0 else 0).value_counts()

0    116505
1     43729
Name: affiliation, dtype: int64

In [None]:
# Combining the two datasets:
q1_author_df = pd.concat([q1_author_df, aut_df], axis=1)

In [None]:
q1_author_df.head()

Unnamed: 0,DOI,JRNL_ID,author,given,family,sequence,affiliation,ORCID,authenticated-orcid,suffix,name
0,10.1002/aic.14056,1_0,"{'given': 'Phuong-Mai', 'family': 'Nguyen', 's...",Phuong-Mai,Nguyen,first,[],,,,
1,10.1002/aic.14056,1_0,"{'given': 'Audrey', 'family': 'Goujon', 'seque...",Audrey,Goujon,additional,[{'name': 'Institut National de la Recherche A...,,,,
2,10.1002/aic.14056,1_0,"{'given': 'Patrick', 'family': 'Sauvegrain', '...",Patrick,Sauvegrain,additional,[{'name': 'Laboratoire National de métrologie ...,,,,
3,10.1002/aic.14056,1_0,"{'given': 'Olivier', 'family': 'Vitrac', 'sequ...",Olivier,Vitrac,additional,[{'name': 'Institut National de la Recherche A...,,,,
4,10.1002/aic.14601,1_1,"{'given': 'Eiji', 'family': 'Iritani', 'sequen...",Eiji,Iritani,first,[{'name': 'Dept. of Chemical Engineering; Nago...,,,,


In [None]:
q1_author_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 722174 entries, 0 to 722173
Data columns (total 11 columns):
 #   Column               Non-Null Count   Dtype 
---  ------               --------------   ----- 
 0   DOI                  722174 non-null  object
 1   JRNL_ID              722174 non-null  object
 2   author               722174 non-null  object
 3   given                720608 non-null  object
 4   family               721590 non-null  object
 5   sequence             722174 non-null  object
 6   affiliation          722174 non-null  object
 7   ORCID                51373 non-null   object
 8   authenticated-orcid  51373 non-null   object
 9   suffix               1021 non-null    object
 10  name                 584 non-null     object
dtypes: object(11)
memory usage: 60.6+ MB


In [None]:
len(q1_author_df[(q1_author_df.ORCID.notna()) & (q1_author_df.affiliation.map(len)==0)])

28055

There are 28K authors that have ORCID info but not any affl. fields.

Can be useful, so will not drop!

In [None]:
q1_author_df.drop(["suffix", "name", "author"], axis=1, inplace=True)

The initial analysis shows that a .groupby() w/o any preprocessing decreases the total number of authors from 720K to 590K.

However, it is seen that there is a lot of errors & punct. marks in the dataset. These must be cleaned for a better groupby as well as API use.

In [None]:
# To enable easy & detailed analysis we need to remove NaN values from author_df. However, this will not be necessary for the final q1_date_df, as it should not have any empty auhtor fields.and

q1_author_df.dropna(subset=["family", "given"], inplace=True)

In [None]:
q1_author_df.drop(["New_given", "New_family",], axis=1, inplace=True)

KeyError: "['New_given', 'New_family'] not found in axis"

The standard for abbr. names will be "A. C."

In [None]:
# Need to df[0] = df[0].str.strip() first:
q1_author_df.given = q1_author_df.given.str.strip()
q1_author_df.family = q1_author_df.family.str.strip()

In [None]:
# Remove multiple spaces:
q1_author_df.given = q1_author_df.given.map(lambda x: re.sub(' +', ' ', x))
q1_author_df.family = q1_author_df.family.map(lambda x: re.sub(' +', ' ', x))

In [None]:
q1_author_df[q1_author_df.given.map(lambda x: bool(re.match("^[A-Z]\.\s[A-Z]\.$", x)))]

Unnamed: 0,DOI,JRNL_ID,given,family,sequence,affiliation,ORCID,authenticated-orcid,New_given,New_family
121,10.1007/s10441-013-9176-6,1_62,A. S.,Jannot,additional,[],,,a. s.,jannot
125,10.1007/s10441-014-9234-8,1_61,G. P.,Samanta,first,[],,,g. p.,samanta
153,10.1007/s00701-013-1624-1,1_73,M. P.,Meier,first,[],,,m. p.,meier
156,10.1007/s00701-013-1624-1,1_73,M. I.,Ruge,additional,[],,,m. i.,ruge
314,10.1007/s10714-010-0991-7,1_114,J. T.,Firouzjaee,first,[],,,j. t.,firouzjaee
...,...,...,...,...,...,...,...,...,...,...
722047,10.1093/geronb/gbt032,1_24503,K. E.,Cichy,additional,[],,,k. e.,cichy
722048,10.1093/geronb/gbt032,1_24503,K. S.,Birditt,additional,[],,,k. s.,birditt
722057,10.1093/geronb/gbt100,1_24503,E. J.,Nicklett,first,[],,,e. j.,nicklett
722059,10.1093/geronb/gbt100,1_24503,M. S.,Spencer,additional,[],,,m. s.,spencer


In [None]:
# Case 1: A. C
q1_author_df.loc[q1_author_df.given.map(lambda x: bool(re.match("^[A-Z]\.\s[A-Z]$", x))),"new_given"] = q1_author_df[q1_author_df.given.map(lambda x: bool(re.match("^[A-Z]\.\s[A-Z]$", x)))].given.map(lambda x: x +".")

In [None]:
# Case 2: A.C
q1_author_df.loc[q1_author_df.given.map(lambda x: bool(re.match("^[A-Z]\.[A-Z]$", x))),"new_given"] = q1_author_df[q1_author_df.given.map(lambda x: bool(re.match("^[A-Z]\.[A-Z]$", x)))].given.map(lambda x: x.replace(".",". ") +".")

In [None]:
# Case 3: AC
q1_author_df.loc[q1_author_df.given.map(lambda x: bool(re.match("^[A-Z]{2}$", x))),"new_given"] = q1_author_df[q1_author_df.given.map(lambda x: bool(re.match("^[A-Z]{2}$", x)))].given.map(lambda x: x[0] + ". " + x[1] + ".")

In [None]:
# Case 4: A C
q1_author_df.loc[q1_author_df.given.map(lambda x: bool(re.match("^[A-Z]\s[A-Z]$", x))),"new_given"] = q1_author_df[q1_author_df.given.map(lambda x: bool(re.match("^[A-Z]\s[A-Z]$", x)))].given.map(lambda x: x.replace(" ", ". ") + ".")

In [None]:
# Case 5: A.C.
q1_author_df.loc[q1_author_df.given.map(lambda x: bool(re.match("^[A-Z]\.[A-Z]\.$", x))), "new_given"] = q1_author_df[q1_author_df.given.map(lambda x: bool(re.match("^[A-Z]\.[A-Z]\.$", x)))].given.map(lambda x: x.replace(".", ". ").strip())

In [None]:
# Single letter names:
q1_author_df.loc[q1_author_df.given.map(lambda x: bool(re.match("^[A-Z]$", x))),"new_given"] = q1_author_df[q1_author_df.given.map(lambda x: bool(re.match("^[A-Z]$", x)))].given.map(lambda x: x + ".")

In [None]:
# Single letter nanmes:
q1_author_df[q1_author_df.given.map(lambda x: bool(re.match("^[A-Z]\.$", x)))]

Unnamed: 0,DOI,JRNL_ID,given,family,sequence,affiliation,ORCID,authenticated-orcid,new_given,new_family
118,10.1007/s10441-013-9176-6,1_62,J.,Demongeot,first,[],,,j.,demongeot
119,10.1007/s10441-013-9176-6,1_62,O.,Hansen,additional,[],,,o.,hansen
120,10.1007/s10441-013-9176-6,1_62,H.,Hessami,additional,[],,,h.,hessami
122,10.1007/s10441-013-9176-6,1_62,J.,Mintsa,additional,[],,,j.,mintsa
123,10.1007/s10441-013-9176-6,1_62,M.,Rachdi,additional,[],,,m.,rachdi
...,...,...,...,...,...,...,...,...,...,...
722041,10.1093/geronb/gbr092,1_24502,M.,Kalmijn,additional,[],,,m.,kalmijn
722043,10.1093/geronb/gbr131,1_24502,M.,Wilson-Genderson,additional,[],,,m.,wilson-genderson
722049,10.1093/geronb/gbt032,1_24503,S.,Zarit,additional,[],,,s.,zarit
722061,10.1093/geronb/gbt121,1_24503,V.,Burholt,first,[],,,v.,burholt


In [None]:
# Removing [Name], (Name),
# remove "Name" , “Name”,
# remove & % , - ' .
# simplf. ö & ü & � & i

# Main Preprocessing:
def name_simplifier(pdf_read):
    # Lowercase all:
    pdf_modified = pdf_read.lower()
    # Remove everything in brackets & paranthesis:
    pdf_modified = re.sub("[\(\[\{].*?[\)\]\}]", "", pdf_modified)
    # Remove quote names:
    pdf_modified = re.sub('["“].*?["”]', "", pdf_modified)
    # Remove numbers
    pdf_modified = re.sub(r'[0-9]', '', pdf_modified)
    # Remove weird punct.
    pdf_modified = re.sub(r'[&\?\$\+\\\*\^\|]', '', pdf_modified)
    # Simplify acct. a:
    pdf_modified = re.sub(r'[áạàảãăặằẳẵâấậầẩẫā]', 'a', pdf_modified)
    # Simplify acct. i:
    pdf_modified = re.sub(r'[íịìỉĩïǐĭīĩįɨıî]', 'i', pdf_modified)
    # Simplify acct. i:
    pdf_modified = re.sub(r'[éẹèẻẽêếệềểễ]', 'e', pdf_modified)
    # Simplify acct. o:
    pdf_modified = re.sub(r'[óòȯôöǒŏōõǫőốồøṓṑ]', 'o', pdf_modified)
    # Simplify acct. u:
    pdf_modified = re.sub(r'[úùûüǔŭūũů]', 'u', pdf_modified)
    # Remove multiple spaces:
    pdf_modified = re.sub(' +', ' ', pdf_modified)
    # Return pdf_modified
    return pdf_modified



In [None]:
# Simpli:

q1_author_df.loc[q1_author_df.new_given.isna(),"new_given"] = q1_author_df[q1_author_df.new_given.isna()].given.map(name_simplifier)
q1_author_df.loc[:,"new_family"] = q1_author_df.family.map(name_simplifier)

In [None]:
# After process stripping:

q1_author_df.given = q1_author_df.given.str.strip()
q1_author_df.family = q1_author_df.family.str.strip()

In [None]:
q1_author_df[["given","family","new_given","new_family"]].sample(20)

Unnamed: 0,given,family,new_given,new_family
662758,Juan,Du,juan,du
112153,Fabiola H.,Gerpott,fabiola h.,gerpott
378415,Nina,Hofmann,nina,hofmann
503665,Steven,Jay,steven,jay
272638,Srdjan,Pusara,srdjan,pusara
380487,A. R.,Mermut,a. r.,mermut
98232,Shiun-Yuan,Hsu,shiun-yuan,hsu
713101,Marta,Lachowska,marta,lachowska
684415,Per,Gardeström,per,gardestrom
454476,John E.,Savage,john e.,savage


In [None]:
q1_author_df.groupby(["new_given","new_family"])["DOI"].count()

new_given   new_family  
                            1
            rahmatd         1
            santiano        1
 jing       cai             1
 kim        keamy           1
                           ..
о.о.        dan’kiv         1
т.а.        denisova        1
тatiana а.  grebennikova    1
ᴌukasz      boguszewicz     1
�lvaro      petracco        1
Name: DOI, Length: 585967, dtype: int64

### Part 2: Adding Subject Areas

In [None]:
with open("q1_jrnl_df", "rb") as fp:
    q1_jrnl_df = pickle.load(fp)

In [None]:
q1_author_df = pd.merge(left=q1_author_df, right=q1_jrnl_df["SA_list"], how="left", left_on="JRNL_ID", right_index=True)

### Part 3: Scopus API Trials

In [None]:
# copied from 6a:

def Scopus_author(author_list):
    for aut in author_list:
        name = aut["given"]
        surname = aut["family"]
        s = AuthorSearch(f'AUTHLAST({surname}) and AUTHFIRST({name})')
        if s.get_results_size()>0:
            aut["country"] = s.authors[0]._asdict()["country"]
        else:
            aut["country"] = None
    return author_list

In [None]:
# copied from 6a:

# After adding the country field for every author in "author" column:
# Can use to create a seperate "author_countries" column

def country_counter(author_col):
    return Counter([aut["country"] for aut in author_col])


Useful stuff:

In [17]:
scopus_codes_dict = {'Agricultural and Biological Sciences': 'AGRI',
 'Arts and Humanities': 'ARTS',
 'Biochemistry, Genetics and Molecular Biology': 'BIOC',
 'Business, Management and Accounting': 'BUSI',
 'Chemical Engineering': 'CENG',
 'Chemistry': 'CHEM',
 'Computer Science': 'COMP',
 'Decision Sciences': 'DECI',
 'Dentistry': 'DENT',
 'Earth and Planetary Sciences': 'EART',
 'Economics, Econometrics and Finance': 'ECON',
 'Energy': 'ENER',
 'Engineering': 'ENGI',
 'Environmental Science': 'ENVI',
 'Health Professions': 'HEAL',
 'Immunology and Microbiology': 'IMMU',
 'Materials Science': 'MATE',
 'Mathematics': 'MATH',
 'Medicine': 'MEDI',
 'Neuroscience': 'NEUR',
 'Nursing': 'NURS',
 'Pharmacology, Toxicology and Pharmaceutics': 'PHAR',
 'Physics and Astronomy': 'PHYS',
 'Psychology': 'PSYC',
 'Social Sciences': 'SOCI',
 'Veterinary': 'VETE',
 'Multidisciplinary': 'MULT'}

def sbj_area_query_creator(sbj_list):
    query = " AND ".join([f"SUBJAREA({scopus_codes_dict[sbj]})" for sbj in sbj_list])
    return query

In [None]:
testo = q1_author_df.sample(1)

testo

Unnamed: 0,DOI,JRNL_ID,given,family,sequence,affiliation,ORCID,authenticated-orcid,new_given,new_family,SA_list
128084,10.1021/acs.jmedchem.6b00176,1_4104,Vanessa,Rada,additional,[],,,vanessa,rada,"[Pharmacology, Toxicology and Pharmaceutics, B..."


In [None]:
Scopus_author(testo)

['United States']

In [None]:
# ScopusQueryError: Found 6574 matches.  The query fails to return more than 5000 entries.  Change your query such that it returns fewer entries.

In [129]:
# VERSION 1 - To be used w/ .apply()

def Scopus_author(row):
    name = row["new_given"]
    surname = row["new_family"]
    sa_list = sbj_area_query_creator(row.SA_list)
    try:
        s = AuthorSearch(f'AUTHLAST({surname}) AND AUTHFIRST({name}) AND ({sa_list})')
    except:
        return None
    if s.get_results_size() > 0:
        return [s.authors[i]._asdict()["country"] for i in range(s.get_results_size())]
    else:
        return np.nan

In [None]:
# VERSION 2 - To be used w/ .samle()

def Scopus_author_sample(row):
    name = row["new_given"].item()
    surname = row["new_family"].item()
    sa_list = sbj_area_query_creator(row.SA_list.item())
    try:
        s = AuthorSearch(f'AUTHLAST({surname}) AND AUTHFIRST({name}) AND ({sa_list})')
    except:
        return None
    if s.get_results_size() > 0:
        return [s.authors[i]._asdict()["country"] for i in range(s.get_results_size())]
    else:
        return np.nan

In [124]:
testo_df = q1_author_df.sample(6000)

In [130]:
testo_df["scopus_result"] = testo_df.apply(Scopus_author, axis=1)

In [135]:
testo_df.scopus_result.map(type).value_counts()

<class 'NoneType'>    3983
<class 'list'>        1974
<class 'float'>         43
Name: scopus_result, dtype: int64

In [143]:
with open("testo_df","wb") as p:
    pickle.dump(testo_df, p)

In [None]:
with open("q1_author_df","wb") as p:
    pickle.dump(q1_author_df, p)

In [140]:
print(s_s)

Search 'AUTHLAST(Evans) and AUTHFIRST(David G.) AND (SUBJAREA(MEDI) OR SUBJAREA(BIOC) OR SUBJAREA(NEUR))' yielded 15 authors as of 2022-03-22:
    Evans, D. Gareth R.; AUTHOR_ID:6505764222 (1,084 document(s))
    Evans, David Geraint; AUTHOR_ID:57199095509 (450 document(s))
    Evans, David G.; AUTHOR_ID:34770687200 (22 document(s))
    Evans, David G.; AUTHOR_ID:57223817001 (10 document(s))
    Evans, David G.; AUTHOR_ID:55478754500 (9 document(s))
    Evans, David G.; AUTHOR_ID:55478764200 (7 document(s))
    Evans, David G.; AUTHOR_ID:57209538123 (5 document(s))
    Evans, David G.; AUTHOR_ID:57214022971 (3 document(s))
    Evans, David G.; AUTHOR_ID:55568514534 (3 document(s))
    Evans, William David George; AUTHOR_ID:55554688500 (3 document(s))
    Evans, David G.; AUTHOR_ID:55478772600 (2 document(s))
    Evans, David Glyn; AUTHOR_ID:57214023150 (1 document(s))
    Evans, David G.; AUTHOR_ID:55478811400 (1 document(s))
    Evans, David G.; AUTHOR_ID:55478794700 (1 document(s))
 

### Part 4: Best way to use Scopus API

In [6]:
# import pybliometrics

# pybliometrics.scopus.utils.create_config()

import pandas as pd
import numpy as np
import regex as re

from pybliometrics.scopus import AuthorSearch

import pickle

In [7]:
scopus_codes_dict = {'Agricultural and Biological Sciences': 'AGRI',
 'Arts and Humanities': 'ARTS',
 'Biochemistry, Genetics and Molecular Biology': 'BIOC',
 'Business, Management and Accounting': 'BUSI',
 'Chemical Engineering': 'CENG',
 'Chemistry': 'CHEM',
 'Computer Science': 'COMP',
 'Decision Sciences': 'DECI',
 'Dentistry': 'DENT',
 'Earth and Planetary Sciences': 'EART',
 'Economics, Econometrics and Finance': 'ECON',
 'Energy': 'ENER',
 'Engineering': 'ENGI',
 'Environmental Science': 'ENVI',
 'Health Professions': 'HEAL',
 'Immunology and Microbiology': 'IMMU',
 'Materials Science': 'MATE',
 'Mathematics': 'MATH',
 'Medicine': 'MEDI',
 'Neuroscience': 'NEUR',
 'Nursing': 'NURS',
 'Pharmacology, Toxicology and Pharmaceutics': 'PHAR',
 'Physics and Astronomy': 'PHYS',
 'Psychology': 'PSYC',
 'Social Sciences': 'SOCI',
 'Veterinary': 'VETE',
 'Multidisciplinary': 'MULT'}

def sbj_area_query_creator(sbj_list):
    query = " AND ".join([f"SUBJAREA({scopus_codes_dict[sbj]})" for sbj in sbj_list])
    return query

In [8]:
with open("q1_author_df", "rb") as fp:
    q1_author_df = pickle.load(fp)

In [4]:
author_records = q1_author_df[["new_given","new_family","SA_list"]].sample(10).to_dict("records")

#### 4.1 - w/o any API Keys 

In [19]:
for author in author_records[:10]:
    name = author["new_given"]
    surname = author["new_family"]
    sa_list = sbj_area_query_creator(author["SA_list"])

    s = AuthorSearch(f'AUTHLAST({surname}) AND AUTHFIRST({name}) AND ({sa_list})')
    author["affil"] = [s.authors[i]._asdict()["country"] for i in range(s.get_results_size())]


Scopus401Error: Invalid API Key

__FAIL__ - We can NOT get any info w/o API keys.

#### 4.2 - w/ already used(?) API key

In [5]:
for author in author_records[:10]:
    name = author["new_given"]
    surname = author["new_family"]
    sa_list = sbj_area_query_creator(author["SA_list"])

    s = AuthorSearch(f'AUTHLAST({surname}) AND AUTHFIRST({name}) AND ({sa_list})')
    author["affil"] = [s.authors[i]._asdict()["country"] for i in range(s.get_results_size())]


Scopus429Error: 

__FAIL__ - Already used API key gives the 429 error as expected & can NOT be used until the following week!

#### 4.3 - w/o VPN connection

In [6]:
for author in author_records:
    name = author["new_given"]
    surname = author["new_family"]
    sa_list = sbj_area_query_creator(author["SA_list"])
    

    s = AuthorSearch(f'AUTHLAST({surname}) AND AUTHFIRST({name}) AND ({sa_list})', download=False)
    n = s.get_results_size()

    if n > 5000:
        author["affil"] = "Too many results!"
    else:
        author["affil"] = [s.authors[i]._asdict()["country"] for i in range(s.get_results_size())]

Scopus401Error: The requestor is not authorized to access the requested view or fields of the resource

__FAIL__ - does NOT work w/o VPN connection!

#### 4.4 - w/ download = False 

Is it reasonable to use download = False to check result_size & use the same request to also get the info needed

In [5]:
for author in author_records:
    name = author["new_given"]
    surname = author["new_family"]
    sa_list = sbj_area_query_creator(author["SA_list"])
    

    s = AuthorSearch(f'AUTHLAST({surname}) AND AUTHFIRST({name}) AND ({sa_list})', download=False)
    n = s.get_results_size()

    if n > 5000:
        author["affil"] = "Too many results!"
    else:
        author["affil"] = [s.authors[i]._asdict()["country"] for i in range(s.get_results_size())]

TypeError: 'NoneType' object is not subscriptable

In [6]:
s.get_key_remaining_quota()

'5000'

__FAIL__ - If download = False, can not use the query result! 

__HOWEVER__, the remaining quota is still 5000, so probab. download = False does not use quota!!

#### 4.5 - Combining all:

In [7]:
# Test 1:

for author in author_records:
    name = author["new_given"]
    surname = author["new_family"]
    sa_list = sbj_area_query_creator(author["SA_list"])
    

    n = AuthorSearch(f'AUTHLAST({surname}) AND AUTHFIRST({name}) AND ({sa_list})', download=False).get_results_size()

    if n > 5000:
        author["affil"] = "Too many results!"
    else:
        s = AuthorSearch(f'AUTHLAST({surname}) AND AUTHFIRST({name}) AND ({sa_list})')
        author["affil"] = [s.authors[i]._asdict()["country"] for i in range(s.get_results_size())]

In [8]:
s.get_key_remaining_quota()

'4970'

In [10]:
pd.DataFrame(author_records)

Unnamed: 0,new_given,new_family,SA_list,affil
0,yichuan,xiao,"[Biochemistry, Genetics and Molecular Biology]","[China, China, China, China]"
1,ahmed ibrahim fadhil,al-adly,"[Energy, Chemistry, Chemical Engineering]",[Iraq]
2,alex,nicol-harper,[Agricultural and Biological Sciences],[United Kingdom]
3,konrad,gajewski,"[Social Sciences, Arts and Humanities, Agricul...",[Canada]
4,ajmal,ahmad,"[Pharmacology, Toxicology and Pharmaceutics]","[Saudi Arabia, Saudi Arabia]"
5,megan,stolen,"[Chemistry, Environmental Science, Medicine]",[United States]
6,dragana,životić,[Earth and Planetary Sciences],[Serbia]
7,yotam,levy,"[Neuroscience, Medicine]",[United Kingdom]
8,michael a. e.,andersen,"[Engineering, Energy]",[Denmark]
9,christopher j.,grim,"[Immunology and Microbiology, Medicine]",[United States]


In [56]:
pd.DataFrame(author_records[:10]).affil.map(len).sum()

14

In [57]:
pd.DataFrame(author_records[:10]).SA_list.map(len).sum()

21

It is WORKING!!!! However, the quota is now 4970 intead of the expected number 4990. Need to check this issue!

#### 4.6 - Analyzing API Quota Issues:

In [11]:
s.get_key_remaining_quota()

'4970'

In [12]:
# As the API quotas are limited, creating a new author_records dict that can be combined with the previous run results.

author_records2 = q1_author_df[["new_given","new_family","SA_list"]].sample(10).to_dict("records")

In [13]:
# Test 2:

for author in author_records2:
    name = author["new_given"]
    surname = author["new_family"]
    sa_list = sbj_area_query_creator(author["SA_list"])
    

    n = AuthorSearch(f'AUTHLAST({surname}) AND AUTHFIRST({name}) AND ({sa_list})', download=False).get_results_size()

    if n > 5000:
        author["affil"] = "Too many results!"
    else:
        s = AuthorSearch(f'AUTHLAST({surname}) AND AUTHFIRST({name}) AND ({sa_list})')
        author["affil"] = [s.authors[i]._asdict()["country"] for i in range(n)]

In [14]:
s.get_key_remaining_quota()

'4940'

In [46]:
pd.DataFrame(author_records2)

Unnamed: 0,new_given,new_family,SA_list,affil
0,hainan,liu,[Medicine],"[China, China, China, China, None, None]"
1,yougen,chen,"[Physics and Astronomy, Materials Science, Mat...",[China]
2,komal,agrawal,"[Chemical Engineering, Energy]",[India]
3,liliana,letra,[Neuroscience],[Portugal]
4,i.,krossing,"[Chemical Engineering, Chemistry]",[Germany]
5,pasan,fernando,"[Chemical Engineering, Engineering, Biochemist...",[Canada]
6,hod,orkibi,[Psychology],[Israel]
7,brena p.,teodorak,[Multidisciplinary],[Brazil]
8,helena,vargas,"[Social Sciences, Arts and Humanities]",[Portugal]
9,alicia j.,spittle,"[Neuroscience, Medicine]",[Australia]


In [55]:
pd.DataFrame(author_records2).affil.map(len).sum()

15

In [19]:
s.get_key_remaining_quota()

'4940'

It appears that .get_results_size() does not use from the quota! BUT still, we used 30 to get results for 10!

In [21]:
author_records3 = q1_author_df[["new_given","new_family","SA_list"]].sample(10).to_dict("records")

In [22]:
# Test 3:

for author in author_records3:
    name = author["new_given"]
    surname = author["new_family"]
    sa_list = sbj_area_query_creator(author["SA_list"])
    
    try:
        s = AuthorSearch(f'AUTHLAST({surname}) AND AUTHFIRST({name}) AND ({sa_list})')
        author["affil"] = [s.authors[i]._asdict()["country"] for i in range(s.get_results_size())]

    except:
        author["affil"] = "Too many results!"


In [26]:
s.get_key_remaining_quota()

'4921'

In [69]:
pd.DataFrame(author_records3)

Unnamed: 0,new_given,new_family,SA_list,affil
0,anna,krook,"[Medicine, Biochemistry, Genetics and Molecula...",[Sweden]
1,g.,martinez-donato,"[Biochemistry, Genetics and Molecular Biology,...",[Cuba]
2,r.,ordoñez-fernandez,"[Agricultural and Biological Sciences, Earth a...",[Spain]
3,sulejman,alihodžić,"[Pharmacology, Toxicology and Pharmaceutics, M...",[]
4,joy c.,cohn,"[Medicine, Nursing]",[United States]
5,susan c,loeb,"[Agricultural and Biological Sciences, Environ...",[United States]
6,quentin,verolet,"[Biochemistry, Genetics and Molecular Biology,...",[Switzerland]
7,garet p.,lahvis,[Neuroscience],[United States]
8,birgit,vogel-heuser,"[Engineering, Decision Sciences, Business, Man...",[Germany]
9,mathieu,vadon,"[Physics and Astronomy, Materials Science, Eng...",[France]


In [48]:
pd.DataFrame(author_records3).SA_list.map(len).sum()

24

In [54]:
pd.DataFrame(author_records3).affil.map(len).sum()

9

This time, we were able to decrease the number to 20, (19 as there is an author with no info retrieved.), but still we're spending 2 tokens for a single author!

In [42]:
s.authors

[Author(eid='9-s2.0-57193683976', orcid=None, surname='Vadon', initials='M.', givenname='Mathieu', affiliation='Sciences et Ingénierie, Matériaux, Procédés (SIMaP)', documents=3, affiliation_id='60108264', city="Saint Martin d'Heres", country='France', areas='MATE (6); ENGI (3); PHYS (3)')]

In [39]:
# Combining results so far:

author_records = author_records + author_records2 + author_records3

In [41]:
author_records4 = q1_author_df[["new_given","new_family","SA_list"]].sample(10).to_dict("records")

In [43]:
# Test 4:

for author in author_records4:
    name = author["new_given"]
    surname = author["new_family"]
    sa_list = sbj_area_query_creator(author["SA_list"])
    
    try:
        s = AuthorSearch(f'AUTHLAST({surname}) AND AUTHFIRST({name}) AND ({sa_list})')
        author["affil"] = [s.authors[i]._asdict()["country"] for i in range(len(s.authors))]

    except:
        author["affil"] = "Too many results!"


In [44]:
s.get_key_remaining_quota()

'4896'

In [45]:
4921-4896

25

In [53]:
pd.DataFrame(author_records4).affil.map(len).sum()

1132

In [70]:
pd.DataFrame(author_records4).affil.map(len)

0       1
1       1
2       6
3       1
4       1
5      13
6    1106
7       1
8       1
9       1
Name: affil, dtype: int64

Quite weirdly, the number is now 25!

I have no idea why, my only guess is that Wei Li has returned 1106 results, which might have resulted in multiple API tokens being used! 

In [75]:
author_records5 = q1_author_df[["new_given","new_family","SA_list"]].sample(10).to_dict("records")

In [77]:
# Test 5:

for author in author_records5:
    name = author["new_given"]
    surname = author["new_family"]
    sa_list = sbj_area_query_creator(author["SA_list"])
    
    try:
        s = AuthorSearch(f'AUTHLAST({surname}) AND AUTHFIRST({name}) AND {sa_list}')
        author["affil"] = [s.authors[i]._asdict()["country"] for i in range(len(min(s.authors,10)))]

    except:
        author["affil"] = "Too many results!"


In [78]:
s.get_key_remaining_quota()

'4861'

It was a stupid attempt to use min() and it did not work. However, as there were 2 Chinese authors in author_records5, which resulted in 35 tokens used, this gave me an idea! 

In [80]:
author_records6 = q1_author_df[["new_given","new_family","SA_list"]].sample(10).to_dict("records")

In [82]:
# Test 6:

for author in author_records6:
    name = author["new_given"]
    surname = author["new_family"]
    sa_list = sbj_area_query_creator(author["SA_list"])
    
    try:
        s = AuthorSearch(f'AUTHLAST({surname}) AND AUTHFIRST({name}) AND {sa_list}', count=10)
        author["affil"] = [s.authors[i]._asdict()["country"] for i in range(s.authors)]

    except:
        author["affil"] = "Too many results!"


In [83]:
pd.DataFrame(author_records6)

Unnamed: 0,new_given,new_family,SA_list,affil
0,katarzyna,neubauer,"[Biochemistry, Genetics and Molecular Biology,...",Too many results!
1,X. M.,song,"[Physics and Astronomy, Energy]",Too many results!
2,chantal,claud,[Earth and Planetary Sciences],Too many results!
3,kelvin h.,wan,[Medicine],Too many results!
4,gustavo b.,rossini,[Multidisciplinary],Too many results!
5,A. F.,islam,"[Health Professions, Medicine]",Too many results!
6,rahul,kakkar,[Medicine],Too many results!
7,yu,huang,"[Engineering, Earth and Planetary Sciences, Ag...",Too many results!
8,kevin s.,montes,"[Psychology, Medicine, Pharmacology, Toxicolog...",Too many results!
9,keigo,hoshikawa,[Materials Science],Too many results!


In [88]:
len(author_records)

50

In [85]:
s = AuthorSearch(f'AUTHLAST({"hoshikawa"}) AND AUTHFIRST({"keigo"})', count=10)

NameError: name 'warn' is not defined

Using count argument is not possible right now due to the argument being marked as deprecated. However, it is still available in the Scopus API website.

Therefore, the package is updated locally & will try again after restarting the kernel. 

The results so far will be updated and saved.

In [87]:
author_records = author_records + author_records4 + author_records5

In [89]:
with open("author_records","wb") as p:
    pickle.dump(author_records, p)

After restart:

In [4]:
s = AuthorSearch(f'AUTHLAST({"hoshikawa"}) AND AUTHFIRST({"keigo"})', count=10)

In [5]:
s.get_key_remaining_quota()

'4859'

In [6]:
s.authors

[Author(eid='9-s2.0-7005252927', orcid=None, surname='Hoshikawa', initials='K.', givenname='Keigo', affiliation='Shinshu University', documents=155, affiliation_id='60031955', city='Matsumoto', country='Japan', areas='PHYS (133); MATE (66); ENGI (40)')]

In [7]:
s = AuthorSearch('AUTHLAST("damasio") AND AUTHFIRST("bruno")', count=10)

In [11]:
s.get_key_remaining_quota()

'4857'

In [10]:
s.authors

[Author(eid='9-s2.0-55042129600', orcid=None, surname='Dama´sio', initials='B.F.', givenname='Bruno Figueiredo', affiliation='Universidade Federal do Rio de Janeiro', documents=49, affiliation_id='60000036', city='Rio de Janeiro', country='Brazil', areas='PSYC (42); SOCI (16); MEDI (8)'),
 Author(eid='9-s2.0-56444228600', orcid='0000-0002-2289-3087', surname='Damásio', initials='B.', givenname='Bruno', affiliation='NOVA Information Management School, Universidade Nova de Lisboa', documents=13, affiliation_id='60105899', city='Lisboa', country='Portugal', areas='SOCI (6); MULT (4); ECON (3)')]

____A BREAKTHROUGH!!!!____

https://github.com/pybliometrics-dev/pybliometrics/issues/77

As seen on the link above the doubling issue is actually on my end, but due to the pybliometrics package!

The fixed version of the package is downloaded to pybliometrics_github folder!!!

Will restart again & try this new version instead

In [1]:
# import pybliometrics

# pybliometrics.scopus.utils.create_config()

# import pandas as pd
# import numpy as np
# import regex as re

# from pybliometrics_github.pybliometrics.scopus import AuthorSearch


# import pickle

In [2]:
# Mini-test:

s = AuthorSearch('AUTHLAST("damasio") AND AUTHFIRST("bruno")', count=10)

In [3]:
s.authors

[Author(eid='9-s2.0-55042129600', orcid=None, surname='Dama´sio', initials='B.F.', givenname='Bruno Figueiredo', affiliation='Universidade Federal do Rio de Janeiro', documents=49, affiliation_id='60000036', city='Rio de Janeiro', country='Brazil', areas='PSYC (42); SOCI (16); MEDI (8)'),
 Author(eid='9-s2.0-56444228600', orcid='0000-0002-2289-3087', surname='Damásio', initials='B.', givenname='Bruno', affiliation='NOVA Information Management School, Universidade Nova de Lisboa', documents=13, affiliation_id='60105899', city='Lisboa', country='Portugal', areas='SOCI (6); MULT (4); ECON (3)')]

In [6]:
s.get_key_remaining_quota()

None


get_key_remaining_quota() is now not working!

Will try to replace the folders in anaconda/sitepackage folder instead & will restart again!

In [2]:
# Mini-test 2:

s = AuthorSearch('AUTHLAST("damasio") AND AUTHFIRST("bruno")')

In [5]:
s.get_key_remaining_quota()

It is still not working :(

The next step is to properly "pip" install the package from github using the code:

pip install git+https://github.com/pybliometrics-dev/pybliometrics.git

Which worked & built the package succesfully! Will restart & test again!

----

JUST REALIZED THAT THE REASON IT'S NOT WORKING MIGHT BE AS PROF. BRUNO IS ALREADY CACHED!

However, this is fine as the package is now install properly & I learned how to install packages from GitHub!

To understand if we succeeded or not, need to run the loop for author_records_6 & 7

In [4]:
author_records6 = q1_author_df[["new_given","new_family","SA_list"]].sample(10).to_dict("records")

In [10]:
# Test 6 - TAKE 2:

for author in author_records6:
    name = author["new_given"]
    surname = author["new_family"]
    sa_list = sbj_area_query_creator(author["SA_list"])
    
    try:
        s = AuthorSearch(f'AUTHLAST({surname}) AND AUTHFIRST({name}) AND {sa_list}')
        author["affil"] = [s.authors[i]._asdict()["country"] for i in range(len(s.authors))]

    except:
        author["affil"] = "Too many results!"


In [8]:
s = AuthorSearch(f'AUTHLAST("nicolson") AND AUTHFIRST("roderick i.")')

In [9]:
s.authors

[Author(eid='9-s2.0-7005185981', orcid=None, surname='Nicolson', initials='R.I.', givenname='Roderick Ian', affiliation='Edge Hill University', documents=103, affiliation_id='60022506', city='Lancashire', country='United Kingdom', areas='PSYC (54); NEUR (42); MEDI (39)')]

In [11]:
pd.DataFrame(author_records6)

Unnamed: 0,new_given,new_family,SA_list,affil
0,david,blough,[Medicine],[United States]
1,shari,shanklin,"[Social Sciences, Arts and Humanities, Medicine]",Too many results!
2,ruolei,gu,"[Social Sciences, Psychology]",[China]
3,roderick i.,nicolson,"[Neuroscience, Psychology]",[United Kingdom]
4,p. k.,sidhu,[Veterinary],"[United States, India]"
5,asmaa n.,mohammed,"[Medicine, Veterinary]",[Egypt]
6,r.,rashedi,"[Chemical Engineering, Materials Science]","[Iran, Malaysia, Iran, Norway]"
7,i.,thorsen,"[Engineering, Decision Sciences, Social Sciences]",[Norway]
8,xin,zhang,[Earth and Planetary Sciences],"[China, China, China, China, China, China, Chi..."
9,ivo,kaunzinger,[Psychology],[Germany]


In [13]:
pd.DataFrame(author_records6).affil.map(len)

0      1
1     17
2      1
3      1
4      2
5      1
6      4
7      1
8    823
9      1
Name: affil, dtype: int64

In [12]:
s.get_key_remaining_quota()

In [14]:
# Mini-test:
q1_author_df.sample(1)

Unnamed: 0,DOI,JRNL_ID,given,family,sequence,affiliation,ORCID,authenticated-orcid,new_given,new_family,SA_list
635885,10.1016/j.ccr.2010.04.025,1_42304,Francesca,Alvarez-Calderon,additional,[],,,francesca,alvarez-calderon,"[Biochemistry, Genetics and Molecular Biology,..."


In [15]:
s1 = AuthorSearch(f'AUTHLAST("Alvarez-Calderon") AND AUTHFIRST("Francesca")')

In [16]:
s1.authors

[Author(eid='9-s2.0-16315284500', orcid=None, surname='Alvarez-Calderon', initials='F.', givenname='Francesca', affiliation='University of Colorado Anschutz Medical Campus', documents=9, affiliation_id='60028392', city='Aurora', country='United States', areas='BIOC (10); MEDI (6); AGRI (1)')]

In [17]:
s1.get_key_remaining_quota()

'4830'

In [26]:
q1_author_df.sample(1)

Unnamed: 0,DOI,JRNL_ID,given,family,sequence,affiliation,ORCID,authenticated-orcid,new_given,new_family,SA_list
141773,10.1016/j.bbapap.2015.04.011,1_47341,Christian,Doerig,additional,[],,,christian,doerig,"[Chemistry, Biochemistry, Genetics and Molecul..."


In [23]:
s2 = AuthorSearch(f'AUTHLAST("nouri") AND AUTHFIRST("yasir m.")')

In [24]:
s2.authors

[Author(eid='9-s2.0-45661660000', orcid=None, surname='Nouri', initials='Y.M.', givenname='Yasir M.', affiliation='University of Ulsan College of Medicine', documents=5, affiliation_id='60006240', city='Seoul', country='South Korea', areas='MEDI (9); HEAL (2)')]

In [25]:
s2.get_key_remaining_quota()

'4828'

In [27]:
s = AuthorSearch(f'AUTHLAST("doerig") AND AUTHFIRST("christian")')

In [28]:
s2.get_key_remaining_quota()

'4826'

In [7]:
q1_author_df.sample(1)

Unnamed: 0,DOI,JRNL_ID,given,family,sequence,affiliation,ORCID,authenticated-orcid,new_given,new_family,SA_list
57290,10.1007/s10618-011-0221-2,1_30751,Ernest,Monga,additional,[],,,ernest,monga,[Computer Science]


In [5]:
s = AuthorSearch(f'AUTHLAST("ferretti") AND AUTHFIRST("c.")',)

In [6]:
s.get_key_remaining_quota()

'4824'

In [8]:
s = AuthorSearch(f'AUTHLAST("monga") AND AUTHFIRST("ernest")', count=10)

NameError: name 'warn' is not defined

In [11]:
s.get_key_reset_time()

'2022-08-04 07:13:31'

Unfortunately, the new version of the package does not seem to fix the double token issue. However, when the code is analyzed, there are no apperant double requests rtaking place, which might cause such an issue.

The only new idea is to implement the "count" argument & try to run it for sample(10) rather than single requests.

It is also seen that the API key remaining quota information is taken directly from the Scopus response & not calc. within pybloimetrics, meaning the information is in fact true :( 

It appears that warnings is not imported, even in the latest release of the package. The search.py file is now modified & kernel will be restarted to try again!

In [5]:
author_records7 = q1_author_df[["new_given","new_family","SA_list"]].sample(10).to_dict("records")

In [6]:
# Test 7:

for author in author_records7:
    name = author["new_given"]
    surname = author["new_family"]
    sa_list = sbj_area_query_creator(author["SA_list"])
    
    try:
        s = AuthorSearch(f'AUTHLAST({surname}) AND AUTHFIRST({name}) AND {sa_list}', count=10)
        author["affil"] = [s.authors[i]._asdict()["country"] for i in range(len(s.authors))]

    except:
        author["affil"] = "Too many results!"




In [7]:
pd.DataFrame(author_records7)

Unnamed: 0,new_given,new_family,SA_list,affil
0,david,adams,"[Environmental Science, Social Sciences]","[United States, United Kingdom, Australia, Uni..."
1,michela,guidarelli,"[Agricultural and Biological Sciences, Biochem...",[Italy]
2,mathias,peirlinck,"[Mathematics, Engineering, Computer Science]",[United States]
3,kun-jen,chung,"[Business, Management and Accounting, Economic...",[Taiwan]
4,fang,liu,"[Biochemistry, Genetics and Molecular Biology]","[China, China, China, China, China, China, Chi..."
5,ran,zhang,"[Immunology and Microbiology, Biochemistry, Ge...","[China, China, China]"
6,jai e.,jung,[Computer Science],[South Korea]
7,yoshio,kozono,"[Materials Science, Dentistry]",[Japan]
8,fletcher a,white,"[Immunology and Microbiology, Neuroscience]",[United States]
9,mark f.,randolph,"[Engineering, Earth and Planetary Sciences, Ma...",[Australia]


In [9]:
s.get_key_remaining_quota()

'4697'

In [None]:
4824-4697

In [16]:
q1_author_df.sample(1)

Unnamed: 0,DOI,JRNL_ID,given,family,sequence,affiliation,ORCID,authenticated-orcid,new_given,new_family,SA_list
182487,10.1902/jop.2010.100355,1_4230,P.,Arjun Raju,additional,[],,,p.,arjun raju,[Dentistry]


In [17]:
s = AuthorSearch(f'AUTHLAST(Arjun Raju)')

In [18]:
len(s.authors)

1

In [15]:
s.get_key_remaining_quota()

'4569'

In [19]:
s.get_key_remaining_quota()

'4567'

In [20]:
s = AuthorSearch(f'AUTHLAST(kokes)')

In [21]:
len(s.authors)

44

In [22]:
s.get_key_remaining_quota()

'4565'

In [25]:
s = AuthorSearch(f'AUTHLAST(asjkhdksjahdjksah)')

In [26]:
len(s.authors)

TypeError: object of type 'NoneType' has no len()

In [27]:
s.get_key_remaining_quota()

'4564'

An "invalid" (empty) search only uses a single token! 

If there is a valid response that is len() < 200 -> +1 token

If len() > 200 -> even more!! could not work out the logic yet!

---

The problem is I don't know if every search & result retr. should use 2 tokens OR there is still something wrong w/ pyblio. causing an uncessary token use.

The only other option is to try download = False option

In [42]:
# Updating results:

with open("author_records", "rb") as fp:
    author_records = pickle.load(fp)
    

author_records = author_records + author_records6 + author_records7

with open("author_records","wb") as p:
    pickle.dump(author_records, p)

In [35]:
q1_author_df.sample(1)

Unnamed: 0,DOI,JRNL_ID,given,family,sequence,affiliation,ORCID,authenticated-orcid,new_given,new_family,SA_list
574903,10.3390/biology9090287,1_62801,Tetsu,Shimizu,additional,[],,,tetsu,shimizu,"[Agricultural and Biological Sciences, Biochem..."


In [36]:
s.get_key_remaining_quota()

'4563'

In [49]:
s = AuthorSearch(f'AUTHLAST("Shimizu") AND AUTHFIRST("Tetsu")')

Scopus401Error: The requestor is not authorized to access the requested view or fields of the resource

In [34]:
type(s.authors)

NoneType

In [40]:
s.get_key_remaining_quota()

'4563'

#### 4.7 - Elsapy package testing:

Get elsapy client ready:

In [None]:
from elsapy.elsclient import ElsClient
from elsapy.elsprofile import ElsAuthor, ElsAffil
from elsapy.elsdoc import FullDoc, AbsDoc
from elsapy.elssearch import ElsSearch

## Initialize client
client = ElsClient("6ae4b5181b374f521e60d2ef4be73ec5")
#client.inst_token = config['insttoken']

Use pybliometrics once to get up-to-date quota info:

In [9]:
s = AuthorSearch(f'AUTHLAST("Shimizu") AND AUTHFIRST("Tetsu")')

In [13]:
len(s.authors)

12

In [11]:
s.get_key_remaining_quota()

'4561'

Use elsapy to retrive the same records:

In [12]:
## Initialize author search object and execute search
auth_srch = ElsSearch('AUTHLAST(Shimizu) AND AUTHFIRST(Tetsu)','author')
auth_srch.execute(client)
print ("auth_srch has", len(auth_srch.results), "results.")

auth_srch has 12 results.


In [29]:
auth_srch.results

[{'@_fa': 'true',
  'link': [{'@_fa': 'true',
    '@ref': 'self',
    '@href': 'https://api.elsevier.com/content/author/author_id/7408149816'},
   {'@_fa': 'true',
    '@ref': 'search',
    '@href': 'https://api.elsevier.com/content/search/author?query=au-id%287408149816%29'},
   {'@_fa': 'true',
    '@ref': 'scopus-citedby',
    '@href': 'https://www.scopus.com/author/citedby.uri?partnerID=HzOxMe3b&citedAuthorId=7408149816&origin=inward'},
   {'@_fa': 'true',
    '@ref': 'scopus-author',
    '@href': 'https://www.scopus.com/authid/detail.uri?partnerID=HzOxMe3b&authorId=7408149816&origin=inward'}],
  'prism:url': 'https://api.elsevier.com/content/author/author_id/7408149816',
  'dc:identifier': 'AUTHOR_ID:7408149816',
  'eid': '9-s2.0-7408149816',
  'preferred-name': {'surname': 'Shimizu',
   'given-name': 'Tetsu',
   'initials': 'T.'},
  'name-variant': [{'@_fa': 'true',
    'surname': 'Shimizu',
    'given-name': 'T.',
    'initials': 'T.'}],
  'document-count': '17',
  'subject-area

In [34]:
for auth in auth_srch.results:
    try:
        print(auth['affiliation-current']['affiliation-country'])
    except KeyError:
        pass

Japan
Japan
Japan
Japan
Japan
Japan
Japan
Japan
Japan
Japan


It appears there is no method within ElsSearch class to check API rem. quaota. 

So will run pybliometrics again!

In [23]:
q1_author_df.sample(1)

Unnamed: 0,DOI,JRNL_ID,given,family,sequence,affiliation,ORCID,authenticated-orcid,new_given,new_family,SA_list
545947,10.1037/a0037703,1_59793,Jason M.,Holland,additional,[],,,jason m.,holland,"[Psychology, Arts and Humanities]"


In [24]:
s = AuthorSearch(f'AUTHLAST(Holland) AND AUTHFIRST(Jason M.)')

In [25]:
len(s.authors)

1

In [26]:
s.get_key_remaining_quota()

'4558'

FINALLY!!!!!!!!!!!!!!!!!!!!

It appears that using elsapy is the answer & uses a single token for a Seach query!!!!!!!!!

#### 4.8 - elsapy advanced trials:


- try to understand 429 error handling & change API keys
- try including sbj_area_mapper
- try count - FAIL & SKIPPED!
- try facets - FAIL & SKIPPED!
- try view & fields - FAIL & SKIPPED!

- instead, understand _upper_limit_reached() 


In [36]:
# 4.8.1 - 429 Error

client = ElsClient("fbef84d23df19f2796c259b76a273652")

auth_srch = ElsSearch('AUTHLAST(Shimizu) AND AUTHFIRST(Tetsu)','author')
auth_srch.execute(client)
print ("auth_srch has", len(auth_srch.results), "results.")

HTTPError: HTTP 429 Error from https://api.elsevier.com/content/search/author?query=AUTHLAST%28Shimizu%29+AND+AUTHFIRST%28Tetsu%29
and using headers {'X-ELS-APIKey': 'fbef84d23df19f2796c259b76a273652', 'User-Agent': 'elsapy-v0.5.0', 'Accept': 'application/json'}:
{"error-response":{"error-code":"TOO_MANY_REQUESTS","error-message":"Request has been placed in time-out for exceeding quota or rate limits. Please reference HTTP header X-RateLimit-Reset for when requests can resubmitted."}}

HTTPError: HTTP 429 Error from https://api.elsevier.com/content/search/author?query=AUTHLAST%28Shimizu%29+AND+AUTHFIRST%28Tetsu%29
and using headers {'X-ELS-APIKey': 'fbef84d23df19f2796c259b76a273652', 'User-Agent': 'elsapy-v0.5.0', 'Accept': 'application/json'}:
{"error-response":{"error-code":"TOO_MANY_REQUESTS","error-message":"Request has been placed in time-out for exceeding quota or rate limits. Please reference HTTP header X-RateLimit-Reset for when requests can resubmitted."}}



In [37]:
client = ElsClient("6ae4b5181b374f521e60d2ef4be73ec5")

In [38]:
q1_author_df.loc[545947,"family"]

DOI                                     10.1037/a0037703
JRNL_ID                                          1_59793
given                                           Jason M.
family                                           Holland
sequence                                      additional
affiliation                                           []
ORCID                                                NaN
authenticated-orcid                                  NaN
new_given                                       jason m.
new_family                                       holland
SA_list                [Psychology, Arts and Humanities]
Name: 545947, dtype: object

In [39]:
# 4.8.2 - incl. sbj. area:

545947

auth_srch = ElsSearch(f'AUTHLAST({q1_author_df.loc[545947,"family"]}) AND AUTHFIRST({q1_author_df.loc[545947,"given"]}) AND {sbj_area_query_creator(q1_author_df.loc[545947,"SA_list"])}','author')
auth_srch.execute(client)
print ("auth_srch has", len(auth_srch.results), "results.")

auth_srch has 1 results.


In [52]:
q1_author_df.sample(1)

Unnamed: 0,DOI,JRNL_ID,given,family,sequence,affiliation,ORCID,authenticated-orcid,new_given,new_family,SA_list
339449,10.1155/2019/7276152,1_24233,Bin,Yang,first,"[{'name': 'School of Mathematical Science, Hua...",,,bin,yang,"[Computer Science, Multidisciplinary]"


In [53]:
# 4.8.3 - count argument:

auth_srch = ElsSearch(f'AUTHLAST({q1_author_df.loc[339449,"family"]}) AND AUTHFIRST({q1_author_df.loc[339449,"given"]}) AND {sbj_area_query_creator(q1_author_df.loc[339449,"SA_list"])}', 'author', count=10)
auth_srch.execute(client)
print ("auth_srch has", len(auth_srch.results), "results.")

TypeError: __init__() got an unexpected keyword argument 'count'

It appears that additional params. are not supported by elsapy. However, in elssearch.py the execute function has a default get_all = False, meaning it does not automatically use additional tokens to get all the query results.

This means that .count() param is most. prob. not needed. However, still need to understand how to use _upper_limit_reached() param for exception handling!!!

In [57]:
# 4.8.3 - _upper_limit_reached argument:

# .loc[70] is Wei Yu, which will work nicely 

auth_srch = ElsSearch(f'AUTHLAST({q1_author_df.loc[70,"family"]}) AND AUTHFIRST({q1_author_df.loc[70,"given"]}) AND {sbj_area_query_creator(q1_author_df.loc[70,"SA_list"])}', 'author')
auth_srch.execute(client)
print ("auth_srch has", len(auth_srch.results), "results.")

auth_srch has 25 results.


In [60]:
auth_srch._upper_limit_reached()

False

In [None]:
# Didn't work will try Wei Wang instead

In [73]:
# 4.8.3 - _upper_limit_reached argument - TAKE 2:



auth_srch = ElsSearch('AUTHLAST(wang) AND AUTHFIRST(wei)', 'author')
auth_srch.execute(client)
print ("auth_srch has", len(auth_srch.results), "results.")

auth_srch has 25 results.


In [75]:
auth_srch.tot_num_res

17043

In [74]:
auth_srch._upper_limit_reached()

False

Thanks to Wei Wang, this debate is also settled! Because the elsapy pack. is so amazing, it will retr. the first 25 results even if the total num is over the API threshold of 5K.

Meaning, we don't have to use any kind of error handling!

This means that after a final trial we are ready to create a loop & use all the API keys available get info for first authors! 

In [63]:
with open("author_records", "rb") as fp:
    author_records = pickle.load(fp)

In [71]:
pd.DataFrame(author_records)[]pd.DataFrame(author_records)["affil"].map(lambda x: "China" in x if type(x)== list else True)

Unnamed: 0,new_given,new_family,SA_list,affil
0,yichuan,xiao,"[Biochemistry, Genetics and Molecular Biology]","[China, China, China, China]"
10,hainan,liu,[Medicine],"[China, China, China, China, None, None]"
11,yougen,chen,"[Physics and Astronomy, Materials Science, Mat...",[China]
36,wei,li,"[Energy, Engineering]","[United States, China, China, China, China, Ch..."
40,john r.,agudelo,"[Energy, Engineering]",Too many results!
41,j.,isselstein,"[Environmental Science, Agricultural and Biolo...",Too many results!
42,yanke,zhang,[Multidisciplinary],Too many results!
43,wei,wang,[Chemistry],Too many results!
44,he qing,huang,"[Earth and Planetary Sciences, Environmental S...",Too many results!
45,fatima r.,qadri,[Medicine],Too many results!


#### 4.9 - Final Checks:

In [101]:
q1_author_df.sample(1)

Unnamed: 0,DOI,JRNL_ID,given,family,sequence,affiliation,ORCID,authenticated-orcid,new_given,new_family,SA_list
216615,10.1016/j.jss.2012.05.024,1_9552,Rory V.,O’Connor,additional,[],,,rory v.,o’connor,[Computer Science]


In [79]:
n = AuthorSearch(f'AUTHLAST({q1_author_df.loc[106178,"family"]}) AND AUTHFIRST({q1_author_df.loc[106178,"family"]}) AND {sbj_area_query_creator(q1_author_df.loc[106178,"SA_list"])}')


In [81]:
len(n.authors)

25

In [82]:
n.get_key_remaining_quota()

'4552'

In [83]:
auth_srch = ElsSearch(f'AUTHLAST({q1_author_df.loc[106178,"family"]}) AND AUTHFIRST({q1_author_df.loc[106178,"family"]}) AND {sbj_area_query_creator(q1_author_df.loc[106178,"SA_list"])}', 'author')
auth_srch.execute(client)
print ("auth_srch has", len(auth_srch.results), "results.")

auth_srch has 25 results.


In [84]:
auth_srch.tot_num_res

25

In [85]:
auth_srch = ElsSearch(f'AUTHLAST({q1_author_df.loc[106178,"family"]}) AND AUTHFIRST({q1_author_df.loc[106178,"family"]})', 'author')
auth_srch.execute(client)
print ("auth_srch has", len(auth_srch.results), "results.")

auth_srch has 25 results.


In [87]:
n = AuthorSearch(f'AUTHLAST({q1_author_df.loc[215508,"family"]}) AND AUTHFIRST({q1_author_df.loc[215508,"family"]}) AND {sbj_area_query_creator(q1_author_df.loc[215508,"SA_list"])}')


In [88]:
n.authors

In [89]:
n.get_key_remaining_quota()

'4549'

The numbers add up again! Meaning the elsapy package uses a single token for each query.

However, as this package does not store the results, it is important to save everything retrieved to prevent repeat queries!

Now the only thing left for this nb is to create a loop for first authors!!!

### Part 5 - The ELSAPY LOOP:

#### 5.1 - Creating q1_first_author_df & Validating datasets before the loop!


In [106]:
len(q1_author_df[q1_author_df.sequence == "first"])

159711

In [107]:
# Creating a first author df
# Using .groupby() to have DOI & SA_list columns 


q1_first_author_df = q1_author_df[q1_author_df.sequence == "first"].groupby(["new_given", "new_family"])["SA_list"].apply(list)

q1_first_author_df = q1_first_author_df.to_frame().merge(q1_author_df[q1_author_df.sequence == "first"].groupby(["new_given", "new_family"])["DOI"].apply(list), how= "inner", right_index=True, left_index=True)

In [131]:
q1_first_author_df.reset_index(inplace=True)

In [137]:
q1_first_author_df[:5]

Unnamed: 0,new_given,new_family,SA_list,DOI
0,,,[[Arts and Humanities]],[10.1177/1606822x14528636]
1,jing,cai,"[[Social Sciences, Arts and Humanities]]",[10.1016/j.jeap.2016.09.002]
2,qing,hao,"[[Economics, Econometrics and Finance]]",[10.1016/j.finmar.2015.11.003]
3,yong,zhou,"[[Business, Management and Accounting]]",[10.1080/10548408.2015.1075933]
4,.,jasmin,"[[Immunology and Microbiology, Chemical Engine...","[10.1186/1477-3155-9-4, 10.1186/1477-3155-9-12]"


The first author has empty strings for name & surname. Must be dropped!

---

Will analyse the dataset to see if there are any other similar issues!

In [138]:
import regex as re

In [141]:
q1_first_author_df[q1_first_author_df.new_given.map(lambda x: bool(re.match("\W",x)))]

Unnamed: 0,new_given,new_family,SA_list,DOI
1,jing,cai,"[[Social Sciences, Arts and Humanities]]",[10.1016/j.jeap.2016.09.002]
2,qing,hao,"[[Economics, Econometrics and Finance]]",[10.1016/j.finmar.2015.11.003]
3,yong,zhou,"[[Business, Management and Accounting]]",[10.1080/10548408.2015.1075933]
4,.,jasmin,"[[Immunology and Microbiology, Chemical Engine...","[10.1186/1477-3155-9-4, 10.1186/1477-3155-9-12]"


In [144]:
q1_first_author_df[q1_first_author_df.new_family.map(lambda x: bool(re.match("\s",x)))]

Unnamed: 0,new_given,new_family,SA_list,DOI
36688,douglas,fairhurst,"[[Economics, Econometrics and Finance]]",[10.1016/j.jbankfin.2020.105907]
46722,gensheng,liu,"[[Computer Science, Engineering, Business, Man...",[10.1108/jmtm-08-2017-0162]
49031,guangling,liu,"[[Economics, Econometrics and Finance]]",[10.1108/01443581011043573]
60680,jasmine,chang,"[[Decision Sciences, Business, Management and ...",[10.1016/j.omega.2018.03.001]
63867,jie,zhang,"[[Economics, Econometrics and Finance, Environ...",[10.1016/j.infoecopol.2015.10.002]
65816,john,jiang,"[[Business, Management and Accounting, Economi...",[10.1016/j.jfineco.2010.02.007]
76930,laura,wahlen,"[[Immunology and Microbiology, Agricultural an...",[10.1080/08927014.2016.1192155]
104492,peng,chen,"[[Engineering, Decision Sciences, Social Scien...",[10.1016/j.trb.2014.11.007]
113548,ruey‐jer,jean,"[[Business, Management and Accounting]]",[10.1108/02651331011037520]
123108,soocheong,jang,"[[Business, Management and Accounting]]",[10.1108/09596111111129995]


It is seen that the after-process .strip() was executed on family & given and not the "new_" counterparts! This is a mistake on my end & will fix below and check for mistakes again!

In [148]:
q1_author_df.new_given = q1_author_df.new_given.str.strip()
q1_author_df.new_family = q1_author_df.new_family.str.strip()

q1_first_author_df.new_given = q1_first_author_df.new_given.str.strip()
q1_first_author_df.new_family = q1_first_author_df.new_family.str.strip()

In [150]:
q1_first_author_df[q1_first_author_df.new_given.map(lambda x: bool(re.match("\W",x)))]

Unnamed: 0,new_given,new_family,SA_list,DOI
4,.,jasmin,"[[Immunology and Microbiology, Chemical Engine...","[10.1186/1477-3155-9-4, 10.1186/1477-3155-9-12]"


In [151]:
q1_first_author_df[q1_first_author_df.new_family.map(lambda x: bool(re.match("\W",x)))]

Unnamed: 0,new_given,new_family,SA_list,DOI
123389,sovu,-,[[Agricultural and Biological Sciences]],[10.14214/sf.444]


In total there are 3 authors with no name & surname or both!

__The problematics DOIs are :__ [10.1177/1606822x14528636, 10.1186/1477-3155-9-4, 10.1186/1477-3155-9-12, 10.14214/sf.444]

These will be dropped from q1_first_author_df but NOT from q1_auhtor_df in this nb!



In [152]:
q1_first_author_df.drop([0,4,123389], inplace=True)

__Data Engineering:__

In [162]:
# 1- Flattening SA_list col:

def flatten(l):
    return [item for sublist in l for item in sublist]

q1_first_author_df.loc[:,"SA_list"] = q1_first_author_df.SA_list.map(flatten)

In [164]:
# 2- Creating an Author ID col:

q1_first_author_df.reset_index(drop=True, inplace=True)

q1_first_author_df.reset_index(drop=False,inplace=True)

q1_first_author_df.loc[:,"index"] = q1_first_author_df.index.map(lambda x: "aut_" + str(x))

q1_first_author_df.set_index("index", inplace=True)

In [173]:
q1_first_author_df.head()

Unnamed: 0_level_0,new_given,new_family,SA_list,DOI
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
aut_0,jing,cai,"[Social Sciences, Arts and Humanities]",[10.1016/j.jeap.2016.09.002]
aut_1,qing,hao,"[Economics, Econometrics and Finance]",[10.1016/j.finmar.2015.11.003]
aut_2,yong,zhou,"[Business, Management and Accounting]",[10.1080/10548408.2015.1075933]
aut_3,A.,abba,"[Engineering, Energy]",[10.1109/tns.2010.2049658]
aut_4,A.,agodi,"[Medicine, Nursing]",[10.1038/ejcn.2011.125]


q1_first_author_df should be now ready!

#### 5.2 - Creating the loop

In [198]:
# Step 1- Create input dict:
first_author_input_dict =q1_first_author_df[["new_given","new_family","SA_list"]].to_dict("index")

In [None]:
avail_api_keys_list = []
used_api_keys_list = []

## Initialize client
client = ElsClient("6ae4b5181b374f521e60d2ef4be73ec5")
#client.inst_token = config['insttoken']

In [204]:
testo_aut_dict = q1_first_author_df[["new_given","new_family","SA_list"]].sample(10).to_dict("index")

In [205]:
# Test Loop:

author_out_list= list()

for auth_id in testo_aut_dict:
    given_in = testo_aut_dict[auth_id]["new_given"]
    family_in = testo_aut_dict[auth_id]["new_family"]
    sa_in = testo_aut_dict[auth_id]["SA_list"]

    auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
    auth_srch.execute(client)

    country_out = list()
    given_out = list()
    family_out = list()
    doc_count_out = list()

    for auth in auth_srch.results:
        # Country:
        try:
            country_out.append(auth['affiliation-current']['affiliation-country'])
        except:
            pass

        # Given:
        try:
            given_out.append(auth['preferred-name']['given-name'])
        except:
            pass

        # Family:
        try:
            family_out.append(auth['preferred-name']['surname'])
        except:
            pass

        # Doc Count:
        try:
            doc_count_out.append(auth['document-count'])
        except:
            pass

    
    author_out_dict = dict(author_id=auth_id, country=country_out, given=given_out, family=family_out, doc_count=doc_count_out)

    author_out_list.append(author_out_dict)




In [208]:
pd.DataFrame(author_out_list)

Unnamed: 0,author_id,country,given,family,doc_count
0,aut_42341,[France],[Fabrice],[Bertile],[47]
1,aut_7026,[],[],[],[]
2,aut_75718,[United States],[Kurtis M.],[Anderson],[5]
3,aut_57843,"[Ireland, United States, United States, United...","[Justin D., Philip J., Jeffrey W., Russell J.,...","[Holmes, Holmes, Holmes, Holmes, Holmes, Holme...","[412, 254, 140, 135, 104, 94, 66, 34, 25, 18, ..."
4,aut_68033,[United States],[Joshua D.K.],[Brown],[4]
5,aut_55048,[Turkey],[Huseyin],[Simsek],[15]
6,aut_38275,[United States],[Elchanan],[Mossel],[190]
7,aut_1113,"[United States, United States]","[Daniel M., Daniel M.]","[Gatti, Gatti]","[65, 2]"
8,aut_44651,[Sweden],[Frank],[Menger],[9]
9,aut_83026,"[Australia, Australia]","[M. Ali, Ali M.]","[Darendeliler, Darendeliler]","[189, 5]"


THE TEST RESULTS ARE PERFECT!!!

In [225]:
with open("q1_first_author_df","wb") as p:
    pickle.dump(q1_first_author_df, p)

with open("q1_author_df","wb") as p:
    pickle.dump(q1_author_df, p)

#### 5.3 - Full Loop:

In [7]:
import pandas as pd
import numpy as np
import pickle

from elsapy.elsclient import ElsClient
from elsapy.elsprofile import ElsAuthor, ElsAffil
# from elsapy.elsdoc import FullDoc, AbsDoc
from elsapy.elssearch import ElsSearch

In [12]:
scopus_codes_dict = {'Agricultural and Biological Sciences': 'AGRI',
 'Arts and Humanities': 'ARTS',
 'Biochemistry, Genetics and Molecular Biology': 'BIOC',
 'Business, Management and Accounting': 'BUSI',
 'Chemical Engineering': 'CENG',
 'Chemistry': 'CHEM',
 'Computer Science': 'COMP',
 'Decision Sciences': 'DECI',
 'Dentistry': 'DENT',
 'Earth and Planetary Sciences': 'EART',
 'Economics, Econometrics and Finance': 'ECON',
 'Energy': 'ENER',
 'Engineering': 'ENGI',
 'Environmental Science': 'ENVI',
 'Health Professions': 'HEAL',
 'Immunology and Microbiology': 'IMMU',
 'Materials Science': 'MATE',
 'Mathematics': 'MATH',
 'Medicine': 'MEDI',
 'Neuroscience': 'NEUR',
 'Nursing': 'NURS',
 'Pharmacology, Toxicology and Pharmaceutics': 'PHAR',
 'Physics and Astronomy': 'PHYS',
 'Psychology': 'PSYC',
 'Social Sciences': 'SOCI',
 'Veterinary': 'VETE',
 'Multidisciplinary': 'MULT'}

def sbj_area_query_creator(sbj_list):
    query = " AND ".join([f"SUBJAREA({scopus_codes_dict[sbj]})" for sbj in sbj_list])
    return query


In [3]:
with open("q1_author_df", "rb") as fp:
    q1_author_df = pickle.load(fp)

with open("q1_first_author_df", "rb") as fp:
    q1_first_author_df = pickle.load(fp)

In [5]:
first_author_input_dict = q1_first_author_df[["new_given","new_family","SA_list"]].to_dict("index")

In [8]:
# Full Loop:

author_out_list= list()

avail_api_keys_list = ["c14e197a4982e7349247b189ff26da45","fbc5e06467522a07f93e247a24d89d3b","3bd33d7e35df95cc6dc576fd67e7356d","784ab6b22d3148f3d3f94a75596d0633","63db00d1e8f7e5a5ef223abad3858dfe","3f453389d0cada327703d787cd41b4ec","e989f6eb6ea2a19b104eed28efe11071","1a3f1a762b1e2abec678b08f4d7a1038",
"e481b6eef24800668a6ee5ea576c1f97","563df86d8ae1a6e62e0f0e4278a45cb9","92975ea2d964ac570219d520aaad1d41","c8eab163448aa6bd54fa97af700a939b","53acda4267b6a5a9a6a87b8767aa8cc6","9289c787b0d98e1a5a7a7fb58e102e1a","1bf711a2bc396e7e99340bdaf2ba29d0","396d5385a697326c03800cf37ca4f1a2","8a9a5beb728f47b401fd145e1bb035ba","cfbe1762b57a3310906c53bbbb3a68e5",
"7b6016e18d538a15806f172d0ac7a0cd","5d30dea104b6b7c33f6a120619fdbc6e","e6cc81dc4b558be9892fa4e0715822a1","da2094a93eb167105699cfb6c47466b6","a323957ad88e129d8bae2c7b2fd0e63a","a809d51349b6a614581cf57493dc0c94","f36f0ccc1554186bb009e9ca7b3c14c0","a1d21854963d1cca62742c56b5f7633d","f3bdebaa3f41b7443d8d9d8983c00479","bfaac6eb78a88e97874fa1d0bfd8c63e",
"546932c3fcd8e21fbc6eab08010b9f44","0599d76680c29c71e028fc9b77b0c063","baaa9e51f8a49461352af25710719157","30195f0b2192052a36bcab9ce3c4064f","b7e95f5ea731eb5c9e84e7a1d499f50e","8707db153e4b9672fa6df25b03a5f747","2dab4694b4347fa574d159bb97484fc4","6af3d2c09eb08ec6e12f0cead9a1f5bb","3872e798bf48fa28af583b9ebef5deb6","1473c31dbcdb425f9cdaf75c673279d3"]

used_api_keys_list = []

client = ElsClient("6ae4b5181b374f521e60d2ef4be73ec5")
key_count = 1

for auth_id in first_author_input_dict:
    given_in = first_author_input_dict[auth_id]["new_given"]
    family_in = first_author_input_dict[auth_id]["new_family"]
    sa_in = first_author_input_dict[auth_id]["SA_list"]

    try:
        auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
        auth_srch.execute(client)
    except:
        if len(avail_api_keys_list) >0:
            new_key = avail_api_keys_list.pop(0)            
            client = ElsClient(new_key)

            key_count += 1
            print(f"Changed API key, now using key number {key_count}: {new_key}")
            used_api_keys_list.append(new_key)

            auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
            auth_srch.execute(client)

        else:
            print("No available API keys to use!")
            break


    country_out = list()
    given_out = list()
    family_out = list()
    doc_count_out = list()

    for auth in auth_srch.results:
        # Country:
        try:
            country_out.append(auth['affiliation-current']['affiliation-country'])
        except:
            pass

        # Given:
        try:
            given_out.append(auth['preferred-name']['given-name'])
        except:
            pass

        # Family:
        try:
            family_out.append(auth['preferred-name']['surname'])
        except:
            pass

        # Doc Count:
        try:
            doc_count_out.append(auth['document-count'])
        except:
            pass

    
    author_out_dict = dict(author_id=auth_id, country=country_out, given=given_out, family=family_out, doc_count=doc_count_out)

    author_out_list.append(author_out_dict)




Changed API key, now using key number 2: c14e197a4982e7349247b189ff26da45
Changed API key, now using key number 3: fbc5e06467522a07f93e247a24d89d3b


HTTPError: HTTP 400 Error from https://api.elsevier.com/content/search/author?query=AUTHLAST%28castroagud%EF%BF%BDn%29+AND+AUTHFIRST%28V.+L.%29+AND+SUBJAREA%28AGRI%29
and using headers {'X-ELS-APIKey': 'fbc5e06467522a07f93e247a24d89d3b', 'User-Agent': 'elsapy-v0.5.0', 'Accept': 'application/json'}:
{"service-error":{"status":{"statusCode":"INVALID_INPUT","statusText":"Error translating query"}}}

In [10]:
len(author_out_list)

4915

In [None]:
first_author_input_dict = q1_first_author_df[["new_given","new_family","SA_list"]].to_dict("index")

In [11]:
with open("author_out_list","wb") as p:
    pickle.dump(author_out_list, p)

In [20]:
q1_first_author_df[["new_given","new_family","SA_list"]][4914:]

Unnamed: 0_level_0,new_given,new_family,SA_list
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
aut_4914,V. L.,berdichevsky,"[Physics and Astronomy, Engineering]"
aut_4915,V. L.,castroagud�n,[Agricultural and Biological Sciences]
aut_4916,V. L.,clifton,[Medicine]
aut_4917,V. L.,kozhevnikov,[Chemistry]
aut_4918,V. L.,ratia,"[Physics and Astronomy, Materials Science, Eng..."
...,...,...,...
aut_146533,žiga,šušteršic,"[Physics and Astronomy, Materials Science, Eng..."
aut_146534,živa bricman,rejc,"[Engineering, Energy, Chemical Engineering, En..."
aut_146535,а.kh.,inoyatov,"[Materials Science, Physics and Astronomy]"
aut_146536,е.,blagodatskaya,"[Agricultural and Biological Sciences, Immunol..."


#### 5.4 - Take 2:

In [21]:
first_author_input_dict_take2 = q1_first_author_df[["new_given","new_family","SA_list"]][4916:].to_dict("index")

In [23]:
# Full Loop - Take 2:

#author_out_list= list()

avail_api_keys_list = ["c14e197a4982e7349247b189ff26da45","fbc5e06467522a07f93e247a24d89d3b","3bd33d7e35df95cc6dc576fd67e7356d","784ab6b22d3148f3d3f94a75596d0633","63db00d1e8f7e5a5ef223abad3858dfe","3f453389d0cada327703d787cd41b4ec","e989f6eb6ea2a19b104eed28efe11071","1a3f1a762b1e2abec678b08f4d7a1038",
"e481b6eef24800668a6ee5ea576c1f97","563df86d8ae1a6e62e0f0e4278a45cb9","92975ea2d964ac570219d520aaad1d41","c8eab163448aa6bd54fa97af700a939b","53acda4267b6a5a9a6a87b8767aa8cc6","9289c787b0d98e1a5a7a7fb58e102e1a","1bf711a2bc396e7e99340bdaf2ba29d0","396d5385a697326c03800cf37ca4f1a2","8a9a5beb728f47b401fd145e1bb035ba","cfbe1762b57a3310906c53bbbb3a68e5",
"7b6016e18d538a15806f172d0ac7a0cd","5d30dea104b6b7c33f6a120619fdbc6e","e6cc81dc4b558be9892fa4e0715822a1","da2094a93eb167105699cfb6c47466b6","a323957ad88e129d8bae2c7b2fd0e63a","a809d51349b6a614581cf57493dc0c94","f36f0ccc1554186bb009e9ca7b3c14c0","a1d21854963d1cca62742c56b5f7633d","f3bdebaa3f41b7443d8d9d8983c00479","bfaac6eb78a88e97874fa1d0bfd8c63e",
"546932c3fcd8e21fbc6eab08010b9f44","0599d76680c29c71e028fc9b77b0c063","baaa9e51f8a49461352af25710719157","30195f0b2192052a36bcab9ce3c4064f","b7e95f5ea731eb5c9e84e7a1d499f50e","8707db153e4b9672fa6df25b03a5f747","2dab4694b4347fa574d159bb97484fc4","6af3d2c09eb08ec6e12f0cead9a1f5bb","3872e798bf48fa28af583b9ebef5deb6","1473c31dbcdb425f9cdaf75c673279d3"]

used_api_keys_list = []

client = ElsClient("6ae4b5181b374f521e60d2ef4be73ec5")
key_count = 1

for auth_id in first_author_input_dict_take2:
    given_in = first_author_input_dict_take2[auth_id]["new_given"]
    family_in = first_author_input_dict_take2[auth_id]["new_family"]
    sa_in = first_author_input_dict_take2[auth_id]["SA_list"]

    try:
        auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
        auth_srch.execute(client)
    except:
        if len(avail_api_keys_list) >0:
            new_key = avail_api_keys_list.pop(0)            
            client = ElsClient(new_key)

            key_count += 1
            print(f"Changed API key, now using key number {key_count}: {new_key}")
            used_api_keys_list.append(new_key)

            auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
            auth_srch.execute(client)

        else:
            print("No available API keys to use!")
            break


    country_out = list()
    given_out = list()
    family_out = list()
    doc_count_out = list()

    for auth in auth_srch.results:
        # Country:
        try:
            country_out.append(auth['affiliation-current']['affiliation-country'])
        except:
            pass

        # Given:
        try:
            given_out.append(auth['preferred-name']['given-name'])
        except:
            pass

        # Family:
        try:
            family_out.append(auth['preferred-name']['surname'])
        except:
            pass

        # Doc Count:
        try:
            doc_count_out.append(auth['document-count'])
        except:
            pass

    
    author_out_dict = dict(author_id=auth_id, country=country_out, given=given_out, family=family_out, doc_count=doc_count_out)

    author_out_list.append(author_out_dict)




Changed API key, now using key number 2: c14e197a4982e7349247b189ff26da45
Changed API key, now using key number 3: fbc5e06467522a07f93e247a24d89d3b
Changed API key, now using key number 4: 3bd33d7e35df95cc6dc576fd67e7356d


HTTPError: HTTP 400 Error from https://api.elsevier.com/content/search/author?query=AUTHLAST%28pieterse%29+AND+AUTHFIRST%28andr%EF%BF%BD+c.%29+AND+SUBJAREA%28ARTS%29
and using headers {'X-ELS-APIKey': '3bd33d7e35df95cc6dc576fd67e7356d', 'User-Agent': 'elsapy-v0.5.0', 'Accept': 'application/json'}:
{"service-error":{"status":{"statusCode":"INVALID_INPUT","statusText":"Error translating query"}}}

In [24]:
pd.DataFrame(author_out_list)

Unnamed: 0,author_id,country,given,family,doc_count
0,aut_0,"[China, China, China, Hong Kong]","[Jing, Jingheng, Jing, (Luna) Jing]","[Cai, Cai, Cai, Cai]","[23, 22, 1, 1]"
1,aut_1,"[United States, United States, United States, ...","[Qing, Qing, (Grace) Qing, Grace Qing]","[Hao, Hao, Hao, Hao]","[86, 11, 2, 1]"
2,aut_2,"[China, China, China, China, China, China, Chi...","[Yongquan, Yonghong, Yong, Dayong, Yongwu, Yon...","[Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zho...","[286, 220, 193, 170, 164, 151, 100, 92, 74, 64..."
3,aut_3,"[Italy, Italy, France, Nigeria, Malaysia, Saud...","[Andrea, Alessandro, Ado Adamou Abba, Hafizull...","[Abba, Abbà, Ari, Ahmed, Haruna, Abba, Abba, A...","[75, 65, 44, 19, 17, 15, 8, 3]"
4,aut_4,[Italy],[Antonella],[Agodi],[234]
...,...,...,...,...,...
14157,aut_14158,"[Poland, Poland, Poland]","[Andrzej, Andrzej, Andrzej]","[Ziółkowski, Ziółkowski, Ziółkowski]","[33, 27, 18]"
14158,aut_14159,"[Poland, Poland]","[Andrzej, Andrzej]","[Ziȩba, Zieba]","[103, 41]"
14159,aut_14160,[Poland],[Andrzej Lech],[Dawidowicz],[203]
14160,aut_14161,[Poland],[Andrzej R.],[Reindl],[20]


In [36]:
with open("author_out_list","wb") as p:
    pickle.dump(author_out_list, p)

In [40]:
q1_first_author_df[["new_given","new_family","SA_list"]][14161:]

Unnamed: 0_level_0,new_given,new_family,SA_list
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
aut_14161,andrzej r.,reindl,"[Medicine, Environmental Science]"
aut_14162,andrzej s.,murawski,[Social Sciences]
aut_14163,andr� c.,pieterse,[Arts and Humanities]
aut_14164,andus wing-kuen,wong,"[Arts and Humanities, Social Sciences]"
aut_14165,andy,aschwanden,[Multidisciplinary]
...,...,...,...
aut_146533,žiga,šušteršic,"[Physics and Astronomy, Materials Science, Eng..."
aut_146534,živa bricman,rejc,"[Engineering, Energy, Chemical Engineering, En..."
aut_146535,а.kh.,inoyatov,"[Materials Science, Physics and Astronomy]"
aut_146536,е.,blagodatskaya,"[Agricultural and Biological Sciences, Immunol..."


In [33]:
q1_first_author_df[q1_first_author_df.new_given.map(lambda x: bool(re.search("�", x)))]

Unnamed: 0_level_0,new_given,new_family,SA_list,DOI
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
aut_14163,andr� c.,pieterse,[Arts and Humanities],[10.4102/ve.v36i1.1420]
aut_112348,roc��o,perez-iglesias,[Medicine],[10.1176/appi.ajp.2009.09050716]


In [32]:
q1_first_author_df[q1_first_author_df.new_family.map(lambda x: bool(re.search("�", x)))]

Unnamed: 0_level_0,new_given,new_family,SA_list,DOI
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
aut_4915,V. L.,castroagud�n,[Agricultural and Biological Sciences],[10.3767/003158516x692149]
aut_48200,gizem,h�l�r,[Medicine],[10.1159/000371757]
aut_124149,stephan,k�hler,"[Psychology, Medicine]",[10.1159/000369847]


It is seen that we are able to capture this character with regex!

Will drop them & create the loop once again!

#### 5.5 - Take 3:

In [43]:
first_author_input_dict_take3 = q1_first_author_df.drop(["aut_4915", "aut_14163", "aut_112348", "aut_48200", "aut_124149"])[["new_given","new_family","SA_list"]][14162:].to_dict("index")

In [44]:
# Full Loop - Take 3:

#author_out_list= list()

avail_api_keys_list = ["3bd33d7e35df95cc6dc576fd67e7356d","784ab6b22d3148f3d3f94a75596d0633","63db00d1e8f7e5a5ef223abad3858dfe","3f453389d0cada327703d787cd41b4ec","e989f6eb6ea2a19b104eed28efe11071","1a3f1a762b1e2abec678b08f4d7a1038",
"e481b6eef24800668a6ee5ea576c1f97","563df86d8ae1a6e62e0f0e4278a45cb9","92975ea2d964ac570219d520aaad1d41","c8eab163448aa6bd54fa97af700a939b","53acda4267b6a5a9a6a87b8767aa8cc6","9289c787b0d98e1a5a7a7fb58e102e1a","1bf711a2bc396e7e99340bdaf2ba29d0","396d5385a697326c03800cf37ca4f1a2","8a9a5beb728f47b401fd145e1bb035ba","cfbe1762b57a3310906c53bbbb3a68e5",
"7b6016e18d538a15806f172d0ac7a0cd","5d30dea104b6b7c33f6a120619fdbc6e","e6cc81dc4b558be9892fa4e0715822a1","da2094a93eb167105699cfb6c47466b6","a323957ad88e129d8bae2c7b2fd0e63a","a809d51349b6a614581cf57493dc0c94","f36f0ccc1554186bb009e9ca7b3c14c0","a1d21854963d1cca62742c56b5f7633d","f3bdebaa3f41b7443d8d9d8983c00479","bfaac6eb78a88e97874fa1d0bfd8c63e",
"546932c3fcd8e21fbc6eab08010b9f44","0599d76680c29c71e028fc9b77b0c063","baaa9e51f8a49461352af25710719157","30195f0b2192052a36bcab9ce3c4064f","b7e95f5ea731eb5c9e84e7a1d499f50e","8707db153e4b9672fa6df25b03a5f747","2dab4694b4347fa574d159bb97484fc4","6af3d2c09eb08ec6e12f0cead9a1f5bb","3872e798bf48fa28af583b9ebef5deb6","1473c31dbcdb425f9cdaf75c673279d3"]

used_api_keys_list = ["6ae4b5181b374f521e60d2ef4be73ec5", "c14e197a4982e7349247b189ff26da45"]

client = ElsClient("fbc5e06467522a07f93e247a24d89d3b")
key_count = 3

for auth_id in first_author_input_dict_take3:
    given_in = first_author_input_dict_take3[auth_id]["new_given"]
    family_in = first_author_input_dict_take3[auth_id]["new_family"]
    sa_in = first_author_input_dict_take3[auth_id]["SA_list"]

    try:
        auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
        auth_srch.execute(client)
    except:
        if len(avail_api_keys_list) >0:
            new_key = avail_api_keys_list.pop(0)            
            client = ElsClient(new_key)

            key_count += 1
            print(f"Changed API key, now using key number {key_count}: {new_key}")
            used_api_keys_list.append(new_key)

            auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
            auth_srch.execute(client)

        else:
            print("No available API keys to use!")
            break


    country_out = list()
    given_out = list()
    family_out = list()
    doc_count_out = list()

    for auth in auth_srch.results:
        # Country:
        try:
            country_out.append(auth['affiliation-current']['affiliation-country'])
        except:
            pass

        # Given:
        try:
            given_out.append(auth['preferred-name']['given-name'])
        except:
            pass

        # Family:
        try:
            family_out.append(auth['preferred-name']['surname'])
        except:
            pass

        # Doc Count:
        try:
            doc_count_out.append(auth['document-count'])
        except:
            pass

    
    author_out_dict = dict(author_id=auth_id, country=country_out, given=given_out, family=family_out, doc_count=doc_count_out)

    author_out_list.append(author_out_dict)




Changed API key, now using key number 4: 3bd33d7e35df95cc6dc576fd67e7356d
Changed API key, now using key number 5: 784ab6b22d3148f3d3f94a75596d0633


ConnectionError: HTTPSConnectionPool(host='api.elsevier.com', port=443): Max retries exceeded with url: /content/search/author?query=AUTHLAST%28sawle%29+AND+AUTHFIRST%28ashley+d.%29+AND+SUBJAREA%28PHAR%29 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001A76DF0A520>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'))

Had a connection error - will  fix & start again! 

In [45]:
pd.DataFrame(author_out_list)

Unnamed: 0,author_id,country,given,family,doc_count
0,aut_0,"[China, China, China, Hong Kong]","[Jing, Jingheng, Jing, (Luna) Jing]","[Cai, Cai, Cai, Cai]","[23, 22, 1, 1]"
1,aut_1,"[United States, United States, United States, ...","[Qing, Qing, (Grace) Qing, Grace Qing]","[Hao, Hao, Hao, Hao]","[86, 11, 2, 1]"
2,aut_2,"[China, China, China, China, China, China, Chi...","[Yongquan, Yonghong, Yong, Dayong, Yongwu, Yon...","[Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zho...","[286, 220, 193, 170, 164, 151, 100, 92, 74, 64..."
3,aut_3,"[Italy, Italy, France, Nigeria, Malaysia, Saud...","[Andrea, Alessandro, Ado Adamou Abba, Hafizull...","[Abba, Abbà, Ari, Ahmed, Haruna, Abba, Abba, A...","[75, 65, 44, 19, 17, 15, 8, 3]"
4,aut_4,[Italy],[Antonella],[Agodi],[234]
...,...,...,...,...,...
17384,aut_17386,[United States],[Peter Ashley],[Burke],[113]
17385,aut_17387,[United States],[Ashley B.],[Saunders],[90]
17386,aut_17388,[United States],[Ashley C.],[Harmon],[2]
17387,aut_17389,[United States],[Ashley C.],[Hechler],[5]


In [48]:
with open("author_out_list","wb") as p:
    pickle.dump(author_out_list, p)

In [49]:
# To ease memory usage:

del first_author_input_dict_take2
del first_author_input_dict
del first_author_input_dict_take3


#### 5.6 - Take 4:

In [51]:
first_author_input_dict_take4 = q1_first_author_df.drop(["aut_4915", "aut_14163", "aut_112348", "aut_48200", "aut_124149"])[["new_given","new_family","SA_list"]][17389:].to_dict("index")

In [53]:
# Full Loop - Take 4:

#author_out_list= list()

avail_api_keys_list = ["784ab6b22d3148f3d3f94a75596d0633","63db00d1e8f7e5a5ef223abad3858dfe","3f453389d0cada327703d787cd41b4ec","e989f6eb6ea2a19b104eed28efe11071","1a3f1a762b1e2abec678b08f4d7a1038",
"e481b6eef24800668a6ee5ea576c1f97","563df86d8ae1a6e62e0f0e4278a45cb9","92975ea2d964ac570219d520aaad1d41","c8eab163448aa6bd54fa97af700a939b","53acda4267b6a5a9a6a87b8767aa8cc6","9289c787b0d98e1a5a7a7fb58e102e1a","1bf711a2bc396e7e99340bdaf2ba29d0","396d5385a697326c03800cf37ca4f1a2","8a9a5beb728f47b401fd145e1bb035ba","cfbe1762b57a3310906c53bbbb3a68e5",
"7b6016e18d538a15806f172d0ac7a0cd","5d30dea104b6b7c33f6a120619fdbc6e","e6cc81dc4b558be9892fa4e0715822a1","da2094a93eb167105699cfb6c47466b6","a323957ad88e129d8bae2c7b2fd0e63a","a809d51349b6a614581cf57493dc0c94","f36f0ccc1554186bb009e9ca7b3c14c0","a1d21854963d1cca62742c56b5f7633d","f3bdebaa3f41b7443d8d9d8983c00479","bfaac6eb78a88e97874fa1d0bfd8c63e",
"546932c3fcd8e21fbc6eab08010b9f44","0599d76680c29c71e028fc9b77b0c063","baaa9e51f8a49461352af25710719157","30195f0b2192052a36bcab9ce3c4064f","b7e95f5ea731eb5c9e84e7a1d499f50e","8707db153e4b9672fa6df25b03a5f747","2dab4694b4347fa574d159bb97484fc4","6af3d2c09eb08ec6e12f0cead9a1f5bb","3872e798bf48fa28af583b9ebef5deb6","1473c31dbcdb425f9cdaf75c673279d3"]

used_api_keys_list = ["6ae4b5181b374f521e60d2ef4be73ec5", "c14e197a4982e7349247b189ff26da45", "fbc5e06467522a07f93e247a24d89d3b"]

client = ElsClient("3bd33d7e35df95cc6dc576fd67e7356d")
key_count = 3

for auth_id in first_author_input_dict_take4:
    given_in = first_author_input_dict_take4[auth_id]["new_given"]
    family_in = first_author_input_dict_take4[auth_id]["new_family"]
    sa_in = first_author_input_dict_take4[auth_id]["SA_list"]

    try:
        auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
        auth_srch.execute(client)
    except:
        if len(avail_api_keys_list) >0:
            new_key = avail_api_keys_list.pop(0)            
            client = ElsClient(new_key)

            key_count += 1
            print(f"Changed API key, now using key number {key_count}: {new_key}")
            used_api_keys_list.append(new_key)

            auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
            auth_srch.execute(client)

        else:
            print("No available API keys to use!")
            break


    country_out = list()
    given_out = list()
    family_out = list()
    doc_count_out = list()

    for auth in auth_srch.results:
        # Country:
        try:
            country_out.append(auth['affiliation-current']['affiliation-country'])
        except:
            pass

        # Given:
        try:
            given_out.append(auth['preferred-name']['given-name'])
        except:
            pass

        # Family:
        try:
            family_out.append(auth['preferred-name']['surname'])
        except:
            pass

        # Doc Count:
        try:
            doc_count_out.append(auth['document-count'])
        except:
            pass

    
    author_out_dict = dict(author_id=auth_id, country=country_out, given=given_out, family=family_out, doc_count=doc_count_out)

    author_out_list.append(author_out_dict)




Changed API key, now using key number 4: 784ab6b22d3148f3d3f94a75596d0633


HTTPError: HTTP 400 Error from https://api.elsevier.com/content/search/author?query=AUTHLAST%28frana%C2%A7a%29+AND+AUTHFIRST%28a%C2%81ngela%29+AND+SUBJAREA%28MATE%29+AND+SUBJAREA%28BIOC%29+AND+SUBJAREA%28ENGI%29+AND+SUBJAREA%28MEDI%29
and using headers {'X-ELS-APIKey': '784ab6b22d3148f3d3f94a75596d0633', 'User-Agent': 'elsapy-v0.5.0', 'Accept': 'application/json'}:
{"service-error":{"status":{"statusCode":"INVALID_INPUT","statusText":"Error translating query"}}}

In [54]:
pd.DataFrame(author_out_list)

Unnamed: 0,author_id,country,given,family,doc_count
0,aut_0,"[China, China, China, Hong Kong]","[Jing, Jingheng, Jing, (Luna) Jing]","[Cai, Cai, Cai, Cai]","[23, 22, 1, 1]"
1,aut_1,"[United States, United States, United States, ...","[Qing, Qing, (Grace) Qing, Grace Qing]","[Hao, Hao, Hao, Hao]","[86, 11, 2, 1]"
2,aut_2,"[China, China, China, China, China, China, Chi...","[Yongquan, Yonghong, Yong, Dayong, Yongwu, Yon...","[Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zho...","[286, 220, 193, 170, 164, 151, 100, 92, 74, 64..."
3,aut_3,"[Italy, Italy, France, Nigeria, Malaysia, Saud...","[Andrea, Alessandro, Ado Adamou Abba, Hafizull...","[Abba, Abbà, Ari, Ahmed, Haruna, Abba, Abba, A...","[75, 65, 44, 19, 17, 15, 8, 3]"
4,aut_4,[Italy],[Antonella],[Agodi],[234]
...,...,...,...,...,...
18308,aut_18310,[Indonesia],[Azzara],[Nurfitri],[1]
18309,aut_18311,[Canada],[Azzedine],[Boukerche],[1041]
18310,aut_18312,[Italy],[Azzurra],[Meoli],[4]
18311,aut_18313,[Italy],[Azzurra],[Santoro],[16]


In [61]:
with open("author_out_list","wb") as p:
    pickle.dump(author_out_list, p)

In [56]:
q1_first_author_df[q1_first_author_df.new_given.map(lambda x: bool(re.search("", x)))]

Unnamed: 0_level_0,new_given,new_family,SA_list,DOI
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
aut_18315,angela,frana§a,"[Materials Science, Biochemistry, Genetics and...",[10.1002/smll.200901006]


In [57]:
q1_first_author_df[q1_first_author_df.new_family.map(lambda x: bool(re.search("", x)))]

Unnamed: 0_level_0,new_given,new_family,SA_list,DOI
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
aut_23897,carlos,capistran,"[Business, Management and Accounting, Economic...",[10.1111/j.1538-4616.2009.00280.x]


It is seen that there is another problematic character !

Will be fixed accordingly.

#### 5.7 - Take 5:

In [63]:
first_author_input_dict_take5 = q1_first_author_df.drop(["aut_4915", "aut_14163", "aut_18315", "aut_23897","aut_112348", "aut_48200", "aut_124149"])[["new_given","new_family","SA_list"]][18313:].to_dict("index")

In [64]:
del first_author_input_dict_take4

In [67]:
# Full Loop - Take 5:

#author_out_list= list()

avail_api_keys_list = ["784ab6b22d3148f3d3f94a75596d0633","63db00d1e8f7e5a5ef223abad3858dfe","3f453389d0cada327703d787cd41b4ec","e989f6eb6ea2a19b104eed28efe11071","1a3f1a762b1e2abec678b08f4d7a1038",
"e481b6eef24800668a6ee5ea576c1f97","563df86d8ae1a6e62e0f0e4278a45cb9","92975ea2d964ac570219d520aaad1d41","c8eab163448aa6bd54fa97af700a939b","53acda4267b6a5a9a6a87b8767aa8cc6","9289c787b0d98e1a5a7a7fb58e102e1a","1bf711a2bc396e7e99340bdaf2ba29d0","396d5385a697326c03800cf37ca4f1a2","8a9a5beb728f47b401fd145e1bb035ba","cfbe1762b57a3310906c53bbbb3a68e5",
"7b6016e18d538a15806f172d0ac7a0cd","5d30dea104b6b7c33f6a120619fdbc6e","e6cc81dc4b558be9892fa4e0715822a1","da2094a93eb167105699cfb6c47466b6","a323957ad88e129d8bae2c7b2fd0e63a","a809d51349b6a614581cf57493dc0c94","f36f0ccc1554186bb009e9ca7b3c14c0","a1d21854963d1cca62742c56b5f7633d","f3bdebaa3f41b7443d8d9d8983c00479","bfaac6eb78a88e97874fa1d0bfd8c63e",
"546932c3fcd8e21fbc6eab08010b9f44","0599d76680c29c71e028fc9b77b0c063","baaa9e51f8a49461352af25710719157","30195f0b2192052a36bcab9ce3c4064f","b7e95f5ea731eb5c9e84e7a1d499f50e","8707db153e4b9672fa6df25b03a5f747","2dab4694b4347fa574d159bb97484fc4","6af3d2c09eb08ec6e12f0cead9a1f5bb","3872e798bf48fa28af583b9ebef5deb6","1473c31dbcdb425f9cdaf75c673279d3"]

used_api_keys_list = ["6ae4b5181b374f521e60d2ef4be73ec5", "c14e197a4982e7349247b189ff26da45", "fbc5e06467522a07f93e247a24d89d3b"]

client = ElsClient("3bd33d7e35df95cc6dc576fd67e7356d")
key_count = 3

for auth_id in first_author_input_dict_take5:
    given_in = first_author_input_dict_take5[auth_id]["new_given"]
    family_in = first_author_input_dict_take5[auth_id]["new_family"]
    sa_in = first_author_input_dict_take5[auth_id]["SA_list"]

    try:
        auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
        auth_srch.execute(client)
    except:
        if len(avail_api_keys_list) >0:
            new_key = avail_api_keys_list.pop(0)            
            client = ElsClient(new_key)

            key_count += 1
            print(f"Changed API key, now using key number {key_count}: {new_key}")
            used_api_keys_list.append(new_key)

            auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
            auth_srch.execute(client)

        else:
            print("No available API keys to use!")
            break


    country_out = list()
    given_out = list()
    family_out = list()
    doc_count_out = list()

    for auth in auth_srch.results:
        # Country:
        try:
            country_out.append(auth['affiliation-current']['affiliation-country'])
        except:
            pass

        # Given:
        try:
            given_out.append(auth['preferred-name']['given-name'])
        except:
            pass

        # Family:
        try:
            family_out.append(auth['preferred-name']['surname'])
        except:
            pass

        # Doc Count:
        try:
            doc_count_out.append(auth['document-count'])
        except:
            pass

    
    author_out_dict = dict(author_id=auth_id, country=country_out, given=given_out, family=family_out, doc_count=doc_count_out)

    author_out_list.append(author_out_dict)




Changed API key, now using key number 4: 784ab6b22d3148f3d3f94a75596d0633
Changed API key, now using key number 5: 63db00d1e8f7e5a5ef223abad3858dfe
Changed API key, now using key number 6: 3f453389d0cada327703d787cd41b4ec
Changed API key, now using key number 7: e989f6eb6ea2a19b104eed28efe11071
Changed API key, now using key number 8: 1a3f1a762b1e2abec678b08f4d7a1038
Changed API key, now using key number 9: e481b6eef24800668a6ee5ea576c1f97


HTTPError: HTTP 500 Error from https://api.elsevier.com/content/search/author?query=AUTHLAST%28fisher%29+AND+AUTHFIRST%28david%29+AND+SUBJAREA%28MEDI%29+AND+SUBJAREA%28DENT%29+AND+SUBJAREA%28MEDI%29+AND+SUBJAREA%28DENT%29
and using headers {'X-ELS-APIKey': 'e481b6eef24800668a6ee5ea576c1f97', 'User-Agent': 'elsapy-v0.5.0', 'Accept': 'application/json'}:
{"service-error":{"status":{"statusCode":"GENERAL_SYSTEM_ERROR","statusText":"Unable to authenticate."}}}

The internet briefly got disconnected, causing a retry!!! twice!

__API keys 3bd33d7e35df95cc6dc576fd67e7356d & 784ab6b22d3148f3d3f94a75596d0633 are not finised!!!__

In [68]:
pd.DataFrame(author_out_list)

Unnamed: 0,author_id,country,given,family,doc_count
0,aut_0,"[China, China, China, Hong Kong]","[Jing, Jingheng, Jing, (Luna) Jing]","[Cai, Cai, Cai, Cai]","[23, 22, 1, 1]"
1,aut_1,"[United States, United States, United States, ...","[Qing, Qing, (Grace) Qing, Grace Qing]","[Hao, Hao, Hao, Hao]","[86, 11, 2, 1]"
2,aut_2,"[China, China, China, China, China, China, Chi...","[Yongquan, Yonghong, Yong, Dayong, Yongwu, Yon...","[Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zho...","[286, 220, 193, 170, 164, 151, 100, 92, 74, 64..."
3,aut_3,"[Italy, Italy, France, Nigeria, Malaysia, Saud...","[Andrea, Alessandro, Ado Adamou Abba, Hafizull...","[Abba, Abbà, Ari, Ahmed, Haruna, Abba, Abba, A...","[75, 65, 44, 19, 17, 15, 8, 3]"
4,aut_4,[Italy],[Antonella],[Agodi],[234]
...,...,...,...,...,...
33027,aut_33031,[Netherlands],[David],[Fernández-Rivas],[60]
33028,aut_33032,[Spain],[David],[Fernández-Blanco],[15]
33029,aut_33033,"[United Kingdom, Portugal, Brazil, Brazil]","[David, David, David Figueiredo, David L.C.]","[Ferreira, Ferreira, Ferreira Filho, Ferreira]","[61, 27, 1, 1]"
33030,aut_33034,[United States],[David A.],[Ferrucci],[19]


1a3f1a762b1e2abec678b08f4d7a1038 & e481b6eef24800668a6ee5ea576c1f97 are also most probably not finished!

In [69]:
with open("author_out_list","wb") as p:
    pickle.dump(author_out_list, p)

In [70]:
with open("q1_first_author_df","wb") as p:
    pickle.dump(q1_first_author_df, p)

#### 5.8 - Take 6:

In [3]:
# Restarted the PC, re-loading variables:

with open("author_out_list", "rb") as fp:
    author_out_list = pickle.load(fp)

with open("q1_first_author_df", "rb") as fp:
    q1_first_author_df = pickle.load(fp)


In [10]:
first_author_input_dict_take6 = q1_first_author_df.drop(["aut_4915", "aut_14163", "aut_18315", "aut_23897","aut_112348", "aut_48200", "aut_124149"])[["new_given","new_family","SA_list"]][33032:].to_dict("index")

In [13]:
# Full Loop - Take 6:
# Fixed API keys lists

#author_out_list= list()

avail_api_keys_list = ["784ab6b22d3148f3d3f94a75596d0633","1a3f1a762b1e2abec678b08f4d7a1038",
"e481b6eef24800668a6ee5ea576c1f97","563df86d8ae1a6e62e0f0e4278a45cb9","92975ea2d964ac570219d520aaad1d41","c8eab163448aa6bd54fa97af700a939b","53acda4267b6a5a9a6a87b8767aa8cc6","9289c787b0d98e1a5a7a7fb58e102e1a","1bf711a2bc396e7e99340bdaf2ba29d0","396d5385a697326c03800cf37ca4f1a2","8a9a5beb728f47b401fd145e1bb035ba","cfbe1762b57a3310906c53bbbb3a68e5",
"7b6016e18d538a15806f172d0ac7a0cd","5d30dea104b6b7c33f6a120619fdbc6e","e6cc81dc4b558be9892fa4e0715822a1","da2094a93eb167105699cfb6c47466b6","a323957ad88e129d8bae2c7b2fd0e63a","a809d51349b6a614581cf57493dc0c94","f36f0ccc1554186bb009e9ca7b3c14c0","a1d21854963d1cca62742c56b5f7633d","f3bdebaa3f41b7443d8d9d8983c00479","bfaac6eb78a88e97874fa1d0bfd8c63e",
"546932c3fcd8e21fbc6eab08010b9f44","0599d76680c29c71e028fc9b77b0c063","baaa9e51f8a49461352af25710719157","30195f0b2192052a36bcab9ce3c4064f","b7e95f5ea731eb5c9e84e7a1d499f50e","8707db153e4b9672fa6df25b03a5f747","2dab4694b4347fa574d159bb97484fc4","6af3d2c09eb08ec6e12f0cead9a1f5bb","3872e798bf48fa28af583b9ebef5deb6","1473c31dbcdb425f9cdaf75c673279d3"]

used_api_keys_list = ["6ae4b5181b374f521e60d2ef4be73ec5", "c14e197a4982e7349247b189ff26da45", "fbc5e06467522a07f93e247a24d89d3b", "63db00d1e8f7e5a5ef223abad3858dfe","3f453389d0cada327703d787cd41b4ec","e989f6eb6ea2a19b104eed28efe11071"]

client = ElsClient("3bd33d7e35df95cc6dc576fd67e7356d")
key_count = 3

for auth_id in first_author_input_dict_take6:
    given_in = first_author_input_dict_take6[auth_id]["new_given"]
    family_in = first_author_input_dict_take6[auth_id]["new_family"]
    sa_in = first_author_input_dict_take6[auth_id]["SA_list"]

    try:
        auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
        auth_srch.execute(client)
    except:
        if len(avail_api_keys_list) >0:
            new_key = avail_api_keys_list.pop(0)            
            client = ElsClient(new_key)

            key_count += 1
            print(f"Changed API key, now using key number {key_count}: {new_key}")
            used_api_keys_list.append(new_key)

            auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
            auth_srch.execute(client)

        else:
            print("No available API keys to use!")
            break


    country_out = list()
    given_out = list()
    family_out = list()
    doc_count_out = list()

    for auth in auth_srch.results:
        # Country:
        try:
            country_out.append(auth['affiliation-current']['affiliation-country'])
        except:
            pass

        # Given:
        try:
            given_out.append(auth['preferred-name']['given-name'])
        except:
            pass

        # Family:
        try:
            family_out.append(auth['preferred-name']['surname'])
        except:
            pass

        # Doc Count:
        try:
            doc_count_out.append(auth['document-count'])
        except:
            pass

    
    author_out_dict = dict(author_id=auth_id, country=country_out, given=given_out, family=family_out, doc_count=doc_count_out)

    author_out_list.append(author_out_dict)




Changed API key, now using key number 4: 784ab6b22d3148f3d3f94a75596d0633
Changed API key, now using key number 5: 1a3f1a762b1e2abec678b08f4d7a1038
Changed API key, now using key number 6: e481b6eef24800668a6ee5ea576c1f97
Changed API key, now using key number 7: 563df86d8ae1a6e62e0f0e4278a45cb9
Changed API key, now using key number 8: 92975ea2d964ac570219d520aaad1d41
Changed API key, now using key number 9: c8eab163448aa6bd54fa97af700a939b
Changed API key, now using key number 10: 53acda4267b6a5a9a6a87b8767aa8cc6
Changed API key, now using key number 11: 9289c787b0d98e1a5a7a7fb58e102e1a
Changed API key, now using key number 12: 1bf711a2bc396e7e99340bdaf2ba29d0
Changed API key, now using key number 13: 396d5385a697326c03800cf37ca4f1a2
Changed API key, now using key number 14: 8a9a5beb728f47b401fd145e1bb035ba
Changed API key, now using key number 15: cfbe1762b57a3310906c53bbbb3a68e5


HTTPError: HTTP 401 Error from https://api.elsevier.com/content/search/author?query=AUTHLAST%28krummenacher%29+AND+AUTHFIRST%28isabelle%29+AND+SUBJAREA%28HEAL%29
and using headers {'X-ELS-APIKey': 'cfbe1762b57a3310906c53bbbb3a68e5', 'User-Agent': 'elsapy-v0.5.0', 'Accept': 'application/json'}:
{"service-error":{"status":{"statusCode":"AUTHORIZATION_ERROR","statusText":"The requestor is not authorized to access the requested view or fields of the resource"}}}

In [14]:
pd.DataFrame(author_out_list)

Unnamed: 0,author_id,country,given,family,doc_count
0,aut_0,"[China, China, China, Hong Kong]","[Jing, Jingheng, Jing, (Luna) Jing]","[Cai, Cai, Cai, Cai]","[23, 22, 1, 1]"
1,aut_1,"[United States, United States, United States, ...","[Qing, Qing, (Grace) Qing, Grace Qing]","[Hao, Hao, Hao, Hao]","[86, 11, 2, 1]"
2,aut_2,"[China, China, China, China, China, China, Chi...","[Yongquan, Yonghong, Yong, Dayong, Yongwu, Yon...","[Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zho...","[286, 220, 193, 170, 164, 151, 100, 92, 74, 64..."
3,aut_3,"[Italy, Italy, France, Nigeria, Malaysia, Saud...","[Andrea, Alessandro, Ado Adamou Abba, Hafizull...","[Abba, Abbà, Ari, Ahmed, Haruna, Abba, Abba, A...","[75, 65, 44, 19, 17, 15, 8, 3]"
4,aut_4,[Italy],[Antonella],[Agodi],[234]
...,...,...,...,...,...
57075,aut_57080,[France],[Isabelle],[Fort],[14]
57076,aut_57081,[France],[Isabelle],[Gallagher],[80]
57077,aut_57082,[Switzerland],[Isabelle],[Herter-Aeberli],[40]
57078,aut_57083,[France],[Isabelle],[Hippolyte],[12]


In [15]:
with open("author_out_list","wb") as p:
    pickle.dump(author_out_list, p)

In [17]:
with open("author_out_list_copy","wb") as p:
    pickle.dump(author_out_list, p)

e481b6eef24800668a6ee5ea576c1f97 is not finished!

Till 53acda4267b6a5a9a6a87b8767aa8cc6 not finished! 

A lot of internet issues!

#### Take 7:

In [3]:
# Restarted the PC, re-loading variables:

with open("author_out_list", "rb") as fp:
    author_out_list = pickle.load(fp)

with open("q1_first_author_df", "rb") as fp:
    q1_first_author_df = pickle.load(fp)


In [9]:
first_author_input_dict_take7 = q1_first_author_df.drop(["aut_4915", "aut_14163", "aut_18315", "aut_23897","aut_112348", "aut_48200", "aut_124149"])[["new_given","new_family","SA_list"]][57085:].to_dict("index")

In [11]:
# Full Loop - Take 7:
# Fixed API keys lists

#author_out_list= list()

avail_api_keys_list = ["563df86d8ae1a6e62e0f0e4278a45cb9","92975ea2d964ac570219d520aaad1d41","c8eab163448aa6bd54fa97af700a939b","396d5385a697326c03800cf37ca4f1a2","8a9a5beb728f47b401fd145e1bb035ba","cfbe1762b57a3310906c53bbbb3a68e5",
"7b6016e18d538a15806f172d0ac7a0cd","5d30dea104b6b7c33f6a120619fdbc6e","e6cc81dc4b558be9892fa4e0715822a1","da2094a93eb167105699cfb6c47466b6","a323957ad88e129d8bae2c7b2fd0e63a","a809d51349b6a614581cf57493dc0c94","f36f0ccc1554186bb009e9ca7b3c14c0","a1d21854963d1cca62742c56b5f7633d","f3bdebaa3f41b7443d8d9d8983c00479","bfaac6eb78a88e97874fa1d0bfd8c63e",
"546932c3fcd8e21fbc6eab08010b9f44","0599d76680c29c71e028fc9b77b0c063","baaa9e51f8a49461352af25710719157","30195f0b2192052a36bcab9ce3c4064f","b7e95f5ea731eb5c9e84e7a1d499f50e","8707db153e4b9672fa6df25b03a5f747","2dab4694b4347fa574d159bb97484fc4","6af3d2c09eb08ec6e12f0cead9a1f5bb","3872e798bf48fa28af583b9ebef5deb6","1473c31dbcdb425f9cdaf75c673279d3"]

used_api_keys_list = ["6ae4b5181b374f521e60d2ef4be73ec5", "c14e197a4982e7349247b189ff26da45", "fbc5e06467522a07f93e247a24d89d3b", "63db00d1e8f7e5a5ef223abad3858dfe","3f453389d0cada327703d787cd41b4ec","e989f6eb6ea2a19b104eed28efe11071",
"784ab6b22d3148f3d3f94a75596d0633","1a3f1a762b1e2abec678b08f4d7a1038","53acda4267b6a5a9a6a87b8767aa8cc6","9289c787b0d98e1a5a7a7fb58e102e1a","1bf711a2bc396e7e99340bdaf2ba29d0","3bd33d7e35df95cc6dc576fd67e7356d"]

client = ElsClient("e481b6eef24800668a6ee5ea576c1f97")

for auth_id in first_author_input_dict_take7:
    given_in = first_author_input_dict_take7[auth_id]["new_given"]
    family_in = first_author_input_dict_take7[auth_id]["new_family"]
    sa_in = first_author_input_dict_take7[auth_id]["SA_list"]

    try:
        auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
        auth_srch.execute(client)
    except:
        if len(avail_api_keys_list) >0:
            with open("author_out_list_loop_save","wb") as p:
                pickle.dump(author_out_list, p)
            print("Loop save complete!")
            new_key = avail_api_keys_list.pop(0)            
            client = ElsClient(new_key)

            print(f"Changed API key, now using key: {new_key}")
            used_api_keys_list.append(new_key)

            auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
            auth_srch.execute(client)

        else:
            print("No available API keys to use!")
            break


    country_out = list()
    given_out = list()
    family_out = list()
    doc_count_out = list()

    for auth in auth_srch.results:
        # Country:
        try:
            country_out.append(auth['affiliation-current']['affiliation-country'])
        except:
            pass

        # Given:
        try:
            given_out.append(auth['preferred-name']['given-name'])
        except:
            pass

        # Family:
        try:
            family_out.append(auth['preferred-name']['surname'])
        except:
            pass

        # Doc Count:
        try:
            doc_count_out.append(auth['document-count'])
        except:
            pass

    
    author_out_dict = dict(author_id=auth_id, country=country_out, given=given_out, family=family_out, doc_count=doc_count_out)

    author_out_list.append(author_out_dict)




Loop save complete!
Changed API key, now using key: 563df86d8ae1a6e62e0f0e4278a45cb9
Loop save complete!
Changed API key, now using key: 92975ea2d964ac570219d520aaad1d41
Loop save complete!
Changed API key, now using key: c8eab163448aa6bd54fa97af700a939b


ConnectionError: HTTPSConnectionPool(host='api.elsevier.com', port=443): Max retries exceeded with url: /content/search/author?query=AUTHLAST%28bai%29+AND+AUTHFIRST%28jiaru%29+AND+SUBJAREA%28DECI%29+AND+SUBJAREA%28BUSI%29 (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001A928B3C550>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond'))

In [12]:
pd.DataFrame(author_out_list)

Unnamed: 0,author_id,country,given,family,doc_count
0,aut_0,"[China, China, China, Hong Kong]","[Jing, Jingheng, Jing, (Luna) Jing]","[Cai, Cai, Cai, Cai]","[23, 22, 1, 1]"
1,aut_1,"[United States, United States, United States, ...","[Qing, Qing, (Grace) Qing, Grace Qing]","[Hao, Hao, Hao, Hao]","[86, 11, 2, 1]"
2,aut_2,"[China, China, China, China, China, China, Chi...","[Yongquan, Yonghong, Yong, Dayong, Yongwu, Yon...","[Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zho...","[286, 220, 193, 170, 164, 151, 100, 92, 74, 64..."
3,aut_3,"[Italy, Italy, France, Nigeria, Malaysia, Saud...","[Andrea, Alessandro, Ado Adamou Abba, Hafizull...","[Abba, Abbà, Ari, Ahmed, Haruna, Abba, Abba, A...","[75, 65, 44, 19, 17, 15, 8, 3]"
4,aut_4,[Italy],[Antonella],[Agodi],[234]
...,...,...,...,...,...
63754,aut_63764,"[China, China, China]","[Jiaquan, Jiaquan, Jiaquan]","[Zhang, Zhang, Zhang]","[165, 76, 4]"
63755,aut_63765,[China],[Jiaqun],[Wei],[70]
63756,aut_63766,"[China, United Kingdom, China, Japan, China, C...","[Jiarong, Jiarong, Jiarong, Jiarong, Jiarong, ...","[Chen, Chen, Chen, Chen, Chen, Chen, Chen, Che...","[174, 22, 12, 7, 6, 5, 3, 1, 1, 1]"
63757,aut_63767,"[China, China, China, China, China]","[Jiayi, Jiarong, Jiarong, Jiarong, Jiarong]","[Li, Li, Li, Li, Li]","[217, 25, 18, 11, 1]"


In [13]:
with open("author_out_list","wb") as p:
    pickle.dump(author_out_list, p)

#### Take 8: - Missing btw. aut_57084-aut_57090

In [21]:
# Proof of missing:

pd.DataFrame(author_out_list)[57079:57089]

Unnamed: 0,author_id,country,given,family,doc_count
57079,aut_57084,[Canada],[Isabelle],[Jean],[9]
57080,aut_57090,[France],[Isabelle],[Mirouze],[15]
57081,aut_57091,[France],[Isabelle],[Munoz],[3]
57082,aut_57092,[United States],[Isabelle M.],[Nilsson],[25]
57083,aut_57093,[Canada],[Isabelle],[Pagé],[40]
57084,aut_57094,[Canada],[Isabelle],[Pitrou],[31]
57085,aut_57095,[France],[Isabelle],[Polaert],[46]
57086,aut_57096,[France],[Isabelle],[Récoché],[4]
57087,aut_57097,[Belgium],[Isabelle],[Simoes Loureiro],[17]
57088,aut_57098,[Belgium],[Isabelle A.],[Sioen],[157]


In [27]:
first_author_input_dict_take8 = q1_first_author_df.drop(["aut_4915", "aut_14163", "aut_18315", "aut_23897","aut_112348", "aut_48200", "aut_124149"])[["new_given","new_family","SA_list"]][57080:57085].to_dict("index")

In [28]:
# Full Loop - Take 8:
# Fixed API keys lists

#author_out_list= list()

avail_api_keys_list = ["92975ea2d964ac570219d520aaad1d41","c8eab163448aa6bd54fa97af700a939b","396d5385a697326c03800cf37ca4f1a2","8a9a5beb728f47b401fd145e1bb035ba","cfbe1762b57a3310906c53bbbb3a68e5",
"7b6016e18d538a15806f172d0ac7a0cd","5d30dea104b6b7c33f6a120619fdbc6e","e6cc81dc4b558be9892fa4e0715822a1","da2094a93eb167105699cfb6c47466b6","a323957ad88e129d8bae2c7b2fd0e63a","a809d51349b6a614581cf57493dc0c94","f36f0ccc1554186bb009e9ca7b3c14c0","a1d21854963d1cca62742c56b5f7633d","f3bdebaa3f41b7443d8d9d8983c00479","bfaac6eb78a88e97874fa1d0bfd8c63e",
"546932c3fcd8e21fbc6eab08010b9f44","0599d76680c29c71e028fc9b77b0c063","baaa9e51f8a49461352af25710719157","30195f0b2192052a36bcab9ce3c4064f","b7e95f5ea731eb5c9e84e7a1d499f50e","8707db153e4b9672fa6df25b03a5f747","2dab4694b4347fa574d159bb97484fc4","6af3d2c09eb08ec6e12f0cead9a1f5bb","3872e798bf48fa28af583b9ebef5deb6","1473c31dbcdb425f9cdaf75c673279d3"]

used_api_keys_list = ["6ae4b5181b374f521e60d2ef4be73ec5", "c14e197a4982e7349247b189ff26da45", "fbc5e06467522a07f93e247a24d89d3b", "63db00d1e8f7e5a5ef223abad3858dfe","3f453389d0cada327703d787cd41b4ec","e989f6eb6ea2a19b104eed28efe11071",
"784ab6b22d3148f3d3f94a75596d0633","1a3f1a762b1e2abec678b08f4d7a1038","53acda4267b6a5a9a6a87b8767aa8cc6","9289c787b0d98e1a5a7a7fb58e102e1a","1bf711a2bc396e7e99340bdaf2ba29d0","3bd33d7e35df95cc6dc576fd67e7356d",
"e481b6eef24800668a6ee5ea576c1f97"]

client = ElsClient("563df86d8ae1a6e62e0f0e4278a45cb9")

for auth_id in first_author_input_dict_take8:
    given_in = first_author_input_dict_take8[auth_id]["new_given"]
    family_in = first_author_input_dict_take8[auth_id]["new_family"]
    sa_in = first_author_input_dict_take8[auth_id]["SA_list"]

    try:
        auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
        auth_srch.execute(client)
    except:
        if len(avail_api_keys_list) >0:
            with open("author_out_list_loop_save","wb") as p:
                pickle.dump(author_out_list, p)
            print("Loop save complete!")
            new_key = avail_api_keys_list.pop(0)            
            client = ElsClient(new_key)

            print(f"Changed API key, now using key: {new_key}")
            used_api_keys_list.append(new_key)

            auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
            auth_srch.execute(client)

        else:
            print("No available API keys to use!")
            break


    country_out = list()
    given_out = list()
    family_out = list()
    doc_count_out = list()

    for auth in auth_srch.results:
        # Country:
        try:
            country_out.append(auth['affiliation-current']['affiliation-country'])
        except:
            pass

        # Given:
        try:
            given_out.append(auth['preferred-name']['given-name'])
        except:
            pass

        # Family:
        try:
            family_out.append(auth['preferred-name']['surname'])
        except:
            pass

        # Doc Count:
        try:
            doc_count_out.append(auth['document-count'])
        except:
            pass

    
    author_out_dict = dict(author_id=auth_id, country=country_out, given=given_out, family=family_out, doc_count=doc_count_out)

    author_out_list.append(author_out_dict)




Loop save complete!
Changed API key, now using key: 92975ea2d964ac570219d520aaad1d41


In [31]:
pd.DataFrame(author_out_list).tail(10)

Unnamed: 0,author_id,country,given,family,doc_count
63754,aut_63764,"[China, China, China]","[Jiaquan, Jiaquan, Jiaquan]","[Zhang, Zhang, Zhang]","[165, 76, 4]"
63755,aut_63765,[China],[Jiaqun],[Wei],[70]
63756,aut_63766,"[China, United Kingdom, China, Japan, China, C...","[Jiarong, Jiarong, Jiarong, Jiarong, Jiarong, ...","[Chen, Chen, Chen, Chen, Chen, Chen, Chen, Che...","[174, 22, 12, 7, 6, 5, 3, 1, 1, 1]"
63757,aut_63767,"[China, China, China, China, China]","[Jiayi, Jiarong, Jiarong, Jiarong, Jiarong]","[Li, Li, Li, Li, Li]","[217, 25, 18, 11, 1]"
63758,aut_63768,[China],[Jiarong],[Liu],[20]
63759,aut_57085,[Switzerland],[Isabelle],[Krummenacher],[6]
63760,aut_57086,[Canada],[Isabelle],[Laforest-Lapointe],[23]
63761,aut_57087,[Canada],[Isabelle],[Masseau],[39]
63762,aut_57088,"[France, Canada, Brazil]","[Isabelle, Isabelle, Isabelle Maria Jaqueline]","[Meunier, Meunier, Meunier]","[108, 12, 2]"
63763,aut_57089,[Canada],[Isabelle],[Michaud-Létourneau],[9]


In [38]:
with open("author_out_list","wb") as p:
    pickle.dump(author_out_list, p)

#### Take 9:

In [37]:
first_author_input_dict_take9 = q1_first_author_df.drop(["aut_4915", "aut_14163", "aut_18315", "aut_23897", "aut_48200", "aut_112348", "aut_124149"])[["new_given","new_family","SA_list"]][63764:].to_dict("index")

In [39]:
# Full Loop - Take 9:
# Fixed API keys lists

#author_out_list= list()

avail_api_keys_list = ["92975ea2d964ac570219d520aaad1d41","c8eab163448aa6bd54fa97af700a939b","396d5385a697326c03800cf37ca4f1a2","8a9a5beb728f47b401fd145e1bb035ba","cfbe1762b57a3310906c53bbbb3a68e5",
"7b6016e18d538a15806f172d0ac7a0cd","5d30dea104b6b7c33f6a120619fdbc6e","e6cc81dc4b558be9892fa4e0715822a1","da2094a93eb167105699cfb6c47466b6","a323957ad88e129d8bae2c7b2fd0e63a","a809d51349b6a614581cf57493dc0c94","f36f0ccc1554186bb009e9ca7b3c14c0","a1d21854963d1cca62742c56b5f7633d","f3bdebaa3f41b7443d8d9d8983c00479","bfaac6eb78a88e97874fa1d0bfd8c63e",
"546932c3fcd8e21fbc6eab08010b9f44","0599d76680c29c71e028fc9b77b0c063","baaa9e51f8a49461352af25710719157","30195f0b2192052a36bcab9ce3c4064f","b7e95f5ea731eb5c9e84e7a1d499f50e","8707db153e4b9672fa6df25b03a5f747","2dab4694b4347fa574d159bb97484fc4","6af3d2c09eb08ec6e12f0cead9a1f5bb","3872e798bf48fa28af583b9ebef5deb6","1473c31dbcdb425f9cdaf75c673279d3"]

used_api_keys_list = ["6ae4b5181b374f521e60d2ef4be73ec5", "c14e197a4982e7349247b189ff26da45", "fbc5e06467522a07f93e247a24d89d3b", "63db00d1e8f7e5a5ef223abad3858dfe","3f453389d0cada327703d787cd41b4ec","e989f6eb6ea2a19b104eed28efe11071",
"784ab6b22d3148f3d3f94a75596d0633","1a3f1a762b1e2abec678b08f4d7a1038","53acda4267b6a5a9a6a87b8767aa8cc6","9289c787b0d98e1a5a7a7fb58e102e1a","1bf711a2bc396e7e99340bdaf2ba29d0","3bd33d7e35df95cc6dc576fd67e7356d",
"e481b6eef24800668a6ee5ea576c1f97","563df86d8ae1a6e62e0f0e4278a45cb9"]

client = ElsClient("92975ea2d964ac570219d520aaad1d41")

for auth_id in first_author_input_dict_take9:
    given_in = first_author_input_dict_take9[auth_id]["new_given"]
    family_in = first_author_input_dict_take9[auth_id]["new_family"]
    sa_in = first_author_input_dict_take9[auth_id]["SA_list"]

    try:
        auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
        auth_srch.execute(client)
    except:
        if len(avail_api_keys_list) >0:
            with open("author_out_list_loop_save","wb") as p:
                pickle.dump(author_out_list, p)
            print("Loop save complete!")
            new_key = avail_api_keys_list.pop(0)            
            client = ElsClient(new_key)

            print(f"Changed API key, now using key: {new_key}")
            used_api_keys_list.append(new_key)

            auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
            auth_srch.execute(client)

        else:
            print("No available API keys to use!")
            break


    country_out = list()
    given_out = list()
    family_out = list()
    doc_count_out = list()

    for auth in auth_srch.results:
        # Country:
        try:
            country_out.append(auth['affiliation-current']['affiliation-country'])
        except:
            pass

        # Given:
        try:
            given_out.append(auth['preferred-name']['given-name'])
        except:
            pass

        # Family:
        try:
            family_out.append(auth['preferred-name']['surname'])
        except:
            pass

        # Doc Count:
        try:
            doc_count_out.append(auth['document-count'])
        except:
            pass

    
    author_out_dict = dict(author_id=auth_id, country=country_out, given=given_out, family=family_out, doc_count=doc_count_out)

    author_out_list.append(author_out_dict)




Loop save complete!
Changed API key, now using key: 92975ea2d964ac570219d520aaad1d41


Had kernel issues, the run was wasted...

Will restart PC & start a new loop

#### Take 10:

In [3]:
# Restarted the PC, re-loading variables:

with open("author_out_list", "rb") as fp:
    author_out_list = pickle.load(fp)

with open("q1_first_author_df", "rb") as fp:
    q1_first_author_df = pickle.load(fp)

with open("author_out_list_loop_save", "rb") as fp:
    author_out_list_loop_save = pickle.load(fp)


In [10]:
first_author_input_dict_take10 = q1_first_author_df.drop(["aut_4915", "aut_14163", "aut_18315", "aut_23897", "aut_48200", "aut_112348", "aut_124149"])[["new_given","new_family","SA_list"]][63764:].to_dict("index")

In [11]:
# Full Loop - Take 10:
# Fixed API keys lists

#author_out_list= list()

avail_api_keys_list = ["92975ea2d964ac570219d520aaad1d41","c8eab163448aa6bd54fa97af700a939b","396d5385a697326c03800cf37ca4f1a2","8a9a5beb728f47b401fd145e1bb035ba","cfbe1762b57a3310906c53bbbb3a68e5",
"7b6016e18d538a15806f172d0ac7a0cd","5d30dea104b6b7c33f6a120619fdbc6e","e6cc81dc4b558be9892fa4e0715822a1","da2094a93eb167105699cfb6c47466b6","a323957ad88e129d8bae2c7b2fd0e63a","a809d51349b6a614581cf57493dc0c94","f36f0ccc1554186bb009e9ca7b3c14c0","a1d21854963d1cca62742c56b5f7633d","f3bdebaa3f41b7443d8d9d8983c00479","bfaac6eb78a88e97874fa1d0bfd8c63e",
"546932c3fcd8e21fbc6eab08010b9f44","0599d76680c29c71e028fc9b77b0c063","baaa9e51f8a49461352af25710719157","30195f0b2192052a36bcab9ce3c4064f","b7e95f5ea731eb5c9e84e7a1d499f50e","8707db153e4b9672fa6df25b03a5f747","2dab4694b4347fa574d159bb97484fc4","6af3d2c09eb08ec6e12f0cead9a1f5bb","3872e798bf48fa28af583b9ebef5deb6","1473c31dbcdb425f9cdaf75c673279d3"]

used_api_keys_list = ["6ae4b5181b374f521e60d2ef4be73ec5", "c14e197a4982e7349247b189ff26da45", "fbc5e06467522a07f93e247a24d89d3b", "63db00d1e8f7e5a5ef223abad3858dfe","3f453389d0cada327703d787cd41b4ec","e989f6eb6ea2a19b104eed28efe11071",
"784ab6b22d3148f3d3f94a75596d0633","1a3f1a762b1e2abec678b08f4d7a1038","53acda4267b6a5a9a6a87b8767aa8cc6","9289c787b0d98e1a5a7a7fb58e102e1a","1bf711a2bc396e7e99340bdaf2ba29d0","3bd33d7e35df95cc6dc576fd67e7356d",
"e481b6eef24800668a6ee5ea576c1f97","563df86d8ae1a6e62e0f0e4278a45cb9"]

client = ElsClient("92975ea2d964ac570219d520aaad1d41")

for auth_id in first_author_input_dict_take10:
    given_in = first_author_input_dict_take10[auth_id]["new_given"]
    family_in = first_author_input_dict_take10[auth_id]["new_family"]
    sa_in = first_author_input_dict_take10[auth_id]["SA_list"]

    try:
        auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
        auth_srch.execute(client)
    except:
        if len(avail_api_keys_list) >0:
            with open("author_out_list_loop_save","wb") as p:
                pickle.dump(author_out_list, p)
            print("Loop save complete!")
            new_key = avail_api_keys_list.pop(0)            
            client = ElsClient(new_key)

            print(f"Changed API key, now using key: {new_key}")
            used_api_keys_list.append(new_key)

            auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
            auth_srch.execute(client)

        else:
            print("No available API keys to use!")
            break


    country_out = list()
    given_out = list()
    family_out = list()
    doc_count_out = list()

    for auth in auth_srch.results:
        # Country:
        try:
            country_out.append(auth['affiliation-current']['affiliation-country'])
        except:
            pass

        # Given:
        try:
            given_out.append(auth['preferred-name']['given-name'])
        except:
            pass

        # Family:
        try:
            family_out.append(auth['preferred-name']['surname'])
        except:
            pass

        # Doc Count:
        try:
            doc_count_out.append(auth['document-count'])
        except:
            pass

    
    author_out_dict = dict(author_id=auth_id, country=country_out, given=given_out, family=family_out, doc_count=doc_count_out)

    author_out_list.append(author_out_dict)




Loop save complete!
Changed API key, now using key: 92975ea2d964ac570219d520aaad1d41
Loop save complete!
Changed API key, now using key: c8eab163448aa6bd54fa97af700a939b
Loop save complete!
Changed API key, now using key: 396d5385a697326c03800cf37ca4f1a2
Loop save complete!
Changed API key, now using key: 8a9a5beb728f47b401fd145e1bb035ba
Loop save complete!
Changed API key, now using key: cfbe1762b57a3310906c53bbbb3a68e5
Loop save complete!
Changed API key, now using key: 7b6016e18d538a15806f172d0ac7a0cd


HTTPError: HTTP 400 Error from https://api.elsevier.com/content/search/author?query=AUTHLAST%28mutu+%28ng%C4%81ti+kahu%2C+te+rarawa+and+ng%C4%81%29+AND+AUTHFIRST%28margaret%29+AND+SUBJAREA%28MULT%29
and using headers {'X-ELS-APIKey': '7b6016e18d538a15806f172d0ac7a0cd', 'User-Agent': 'elsapy-v0.5.0', 'Accept': 'application/json'}:
{"service-error":{"status":{"statusCode":"INVALID_INPUT","statusText":"Error translating query"}}}

In [12]:
pd.DataFrame(author_out_list)

Unnamed: 0,author_id,country,given,family,doc_count
0,aut_0,"[China, China, China, Hong Kong]","[Jing, Jingheng, Jing, (Luna) Jing]","[Cai, Cai, Cai, Cai]","[23, 22, 1, 1]"
1,aut_1,"[United States, United States, United States, ...","[Qing, Qing, (Grace) Qing, Grace Qing]","[Hao, Hao, Hao, Hao]","[86, 11, 2, 1]"
2,aut_2,"[China, China, China, China, China, China, Chi...","[Yongquan, Yonghong, Yong, Dayong, Yongwu, Yon...","[Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zho...","[286, 220, 193, 170, 164, 151, 100, 92, 74, 64..."
3,aut_3,"[Italy, Italy, France, Nigeria, Malaysia, Saud...","[Andrea, Alessandro, Ado Adamou Abba, Hafizull...","[Abba, Abbà, Ari, Ahmed, Haruna, Abba, Abba, A...","[75, 65, 44, 19, 17, 15, 8, 3]"
4,aut_4,[Italy],[Antonella],[Agodi],[234]
...,...,...,...,...,...
85760,aut_85765,[Switzerland],[Margaret M.],[Kehoe],[34]
85761,aut_85766,[Australia],[Margaret Anne],[Kelaher],[169]
85762,aut_85767,[United States],[Margaret T.],[Lehman-Blake],[7]
85763,aut_85768,[],[],[],[]


In [6]:
with open("author_out_list","wb") as p:
    pickle.dump(author_out_list, p)

In [16]:
q1_first_author_df.loc["aut_85770",:]

new_given                                margaret
new_family    mutu (ngāti kahu, te rarawa and ngā
SA_list                       [Multidisciplinary]
DOI               [10.1080/03036758.2019.1669670]
Name: aut_85770, dtype: object

The record has "(" but not ")" !
Will be fixed manually below:

In [20]:
q1_first_author_df[q1_first_author_df.new_family.map(lambda x: bool(re.search("[\(\)]",x)))]

Unnamed: 0_level_0,new_given,new_family,SA_list,DOI
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
aut_85770,margaret,"mutu (ngāti kahu, te rarawa and ngā",[Multidisciplinary],[10.1080/03036758.2019.1669670]


In [21]:
q1_first_author_df[q1_first_author_df.new_given.map(lambda x: bool(re.search("[\(\)]",x)))]

Unnamed: 0_level_0,new_given,new_family,SA_list,DOI
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [23]:
q1_first_author_df.loc["aut_85770","new_family"] = "mutu"

#### Take 11:

From now on, except for nightly updates, will just use this loop in case of connection errors

In [47]:
first_author_input_dict_take11 = q1_first_author_df.drop(["aut_4915", "aut_14163", "aut_18315", "aut_23897", "aut_48200", "aut_112348", "aut_124149"])[["new_given","new_family","SA_list"]][86571:].to_dict("index")

In [48]:
# Full Loop - Take 11:
# Fixed API keys lists

#author_out_list= list()

avail_api_keys_list = ["7b6016e18d538a15806f172d0ac7a0cd","5d30dea104b6b7c33f6a120619fdbc6e","e6cc81dc4b558be9892fa4e0715822a1","da2094a93eb167105699cfb6c47466b6",
"a323957ad88e129d8bae2c7b2fd0e63a","a809d51349b6a614581cf57493dc0c94","f36f0ccc1554186bb009e9ca7b3c14c0","a1d21854963d1cca62742c56b5f7633d","f3bdebaa3f41b7443d8d9d8983c00479","bfaac6eb78a88e97874fa1d0bfd8c63e",
"546932c3fcd8e21fbc6eab08010b9f44","0599d76680c29c71e028fc9b77b0c063","baaa9e51f8a49461352af25710719157","30195f0b2192052a36bcab9ce3c4064f","b7e95f5ea731eb5c9e84e7a1d499f50e","8707db153e4b9672fa6df25b03a5f747",
"2dab4694b4347fa574d159bb97484fc4","6af3d2c09eb08ec6e12f0cead9a1f5bb","3872e798bf48fa28af583b9ebef5deb6","1473c31dbcdb425f9cdaf75c673279d3"]

used_api_keys_list = ["6ae4b5181b374f521e60d2ef4be73ec5", "c14e197a4982e7349247b189ff26da45", "fbc5e06467522a07f93e247a24d89d3b", "63db00d1e8f7e5a5ef223abad3858dfe","3f453389d0cada327703d787cd41b4ec","e989f6eb6ea2a19b104eed28efe11071",
"784ab6b22d3148f3d3f94a75596d0633","1a3f1a762b1e2abec678b08f4d7a1038","53acda4267b6a5a9a6a87b8767aa8cc6","9289c787b0d98e1a5a7a7fb58e102e1a","1bf711a2bc396e7e99340bdaf2ba29d0","3bd33d7e35df95cc6dc576fd67e7356d",
"e481b6eef24800668a6ee5ea576c1f97","563df86d8ae1a6e62e0f0e4278a45cb9","92975ea2d964ac570219d520aaad1d41","c8eab163448aa6bd54fa97af700a939b","396d5385a697326c03800cf37ca4f1a2","8a9a5beb728f47b401fd145e1bb035ba"]

client = ElsClient("cfbe1762b57a3310906c53bbbb3a68e5")
print(f'Start Time = {datetime.now().strftime("%H:%M:%S")}')

for auth_id in first_author_input_dict_take11:
    given_in = first_author_input_dict_take11[auth_id]["new_given"]
    family_in = first_author_input_dict_take11[auth_id]["new_family"]
    sa_in = first_author_input_dict_take11[auth_id]["SA_list"]

    try:
        auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
        auth_srch.execute(client)
    except:
        if len(avail_api_keys_list) >0:
            with open("author_out_list_loop_save","wb") as p:
                pickle.dump(author_out_list, p)
            print("Loop save complete!")
            new_key = avail_api_keys_list.pop(0)            
            client = ElsClient(new_key)
            
            print(f'Time = {datetime.now().strftime("%H:%M:%S")}')
            
            print(f"Changed API key, now using key: {new_key}")
            used_api_keys_list.append(new_key)

            auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
            auth_srch.execute(client)

        else:
            print("No available API keys to use!")
            break


    country_out = list()
    given_out = list()
    family_out = list()
    doc_count_out = list()

    for auth in auth_srch.results:
        # Country:
        try:
            country_out.append(auth['affiliation-current']['affiliation-country'])
        except:
            pass

        # Given:
        try:
            given_out.append(auth['preferred-name']['given-name'])
        except:
            pass

        # Family:
        try:
            family_out.append(auth['preferred-name']['surname'])
        except:
            pass

        # Doc Count:
        try:
            doc_count_out.append(auth['document-count'])
        except:
            pass

    
    author_out_dict = dict(author_id=auth_id, country=country_out, given=given_out, family=family_out, doc_count=doc_count_out)

    author_out_list.append(author_out_dict)




Start Time = 10:39:16
Loop save complete!
Time = 11:14:56
Changed API key, now using key: 7b6016e18d538a15806f172d0ac7a0cd
Loop save complete!
Time = 13:54:27
Changed API key, now using key: 5d30dea104b6b7c33f6a120619fdbc6e
Loop save complete!
Time = 13:55:42
Changed API key, now using key: e6cc81dc4b558be9892fa4e0715822a1
Loop save complete!
Time = 14:59:54
Changed API key, now using key: da2094a93eb167105699cfb6c47466b6
Loop save complete!
Time = 18:27:33
Changed API key, now using key: a323957ad88e129d8bae2c7b2fd0e63a
Loop save complete!
Time = 20:07:07
Changed API key, now using key: a809d51349b6a614581cf57493dc0c94
Loop save complete!
Time = 20:11:28
Changed API key, now using key: f36f0ccc1554186bb009e9ca7b3c14c0
Loop save complete!
Time = 20:21:51
Changed API key, now using key: a1d21854963d1cca62742c56b5f7633d


ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))

In [57]:
pd.DataFrame(author_out_list)

Unnamed: 0,author_id,country,given,family,doc_count
0,aut_0,"[China, China, China, Hong Kong]","[Jing, Jingheng, Jing, (Luna) Jing]","[Cai, Cai, Cai, Cai]","[23, 22, 1, 1]"
1,aut_1,"[United States, United States, United States, ...","[Qing, Qing, (Grace) Qing, Grace Qing]","[Hao, Hao, Hao, Hao]","[86, 11, 2, 1]"
2,aut_2,"[China, China, China, China, China, China, Chi...","[Yongquan, Yonghong, Yong, Dayong, Yongwu, Yon...","[Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zho...","[286, 220, 193, 170, 164, 151, 100, 92, 74, 64..."
3,aut_3,"[Italy, Italy, France, Nigeria, Malaysia, Saud...","[Andrea, Alessandro, Ado Adamou Abba, Hafizull...","[Abba, Abbà, Ari, Ahmed, Haruna, Abba, Abba, A...","[75, 65, 44, 19, 17, 15, 8, 3]"
4,aut_4,[Italy],[Antonella],[Agodi],[234]
...,...,...,...,...,...
101436,aut_101441,[South Africa],[O. T.],[Laseinde],[45]
101437,aut_101442,[Canada],[Ophélie],[Ronce],[51]
101438,aut_101443,[United Kingdom],[Ophelie],[Torres],[7]
101439,aut_101444,[Canada],[Opher],[Baron],[41]


In [50]:
with open("author_out_list","wb") as p:
    pickle.dump(author_out_list, p)

#### Take 12:

In [60]:
q1_first_author_df.drop(["aut_4915", "aut_14163", "aut_18315", "aut_23897", "aut_48200", "aut_112348", "aut_124149"])[101440:]

Unnamed: 0_level_0,new_given,new_family,SA_list,DOI
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
aut_101445,ophir,handzel,"[Medicine, Health Professions]",[10.1159/000349913]
aut_101446,or,ettlinger,[Arts and Humanities],[10.1162/leon_a_00808]
aut_101447,ora,peleg,[Social Sciences],[10.1080/01639625.2012.726176]
aut_101448,oral,sokucu,"[Medicine, Dentistry]",[10.2319/120309-694.1]
aut_101449,oral,yagci,[Engineering],[10.1016/j.apor.2017.08.014]
...,...,...,...,...
aut_146533,žiga,šušteršic,"[Physics and Astronomy, Materials Science, Eng...",[10.1016/j.polymdegradstab.2009.10.013]
aut_146534,živa bricman,rejc,"[Engineering, Energy, Chemical Engineering, En...","[10.1016/j.ijepes.2014.05.019, 10.1016/j.jlp.2..."
aut_146535,а.kh.,inoyatov,"[Materials Science, Physics and Astronomy]",[10.1016/j.elspec.2015.02.015]
aut_146536,е.,blagodatskaya,"[Agricultural and Biological Sciences, Immunol...",[10.1007/s00374-014-0899-6]


In [64]:
q1_first_author_df.loc["aut_101446","new_given"] = "o."

Forgot to query or etlinger

In [65]:
first_author_input_dict_take12 = q1_first_author_df.drop(["aut_4915", "aut_14163", "aut_18315", "aut_23897", "aut_48200", "aut_112348", "aut_124149"])[["new_given","new_family","SA_list"]][101442:].to_dict("index")

In [66]:
# Full Loop - Take 12:
# Fixed API keys lists

#author_out_list= list()

avail_api_keys_list = ["a1d21854963d1cca62742c56b5f7633d","f3bdebaa3f41b7443d8d9d8983c00479","bfaac6eb78a88e97874fa1d0bfd8c63e",
"546932c3fcd8e21fbc6eab08010b9f44","0599d76680c29c71e028fc9b77b0c063","baaa9e51f8a49461352af25710719157","30195f0b2192052a36bcab9ce3c4064f","b7e95f5ea731eb5c9e84e7a1d499f50e","8707db153e4b9672fa6df25b03a5f747",
"2dab4694b4347fa574d159bb97484fc4","6af3d2c09eb08ec6e12f0cead9a1f5bb","3872e798bf48fa28af583b9ebef5deb6","1473c31dbcdb425f9cdaf75c673279d3"]

used_api_keys_list = ["6ae4b5181b374f521e60d2ef4be73ec5", "c14e197a4982e7349247b189ff26da45", "fbc5e06467522a07f93e247a24d89d3b", "63db00d1e8f7e5a5ef223abad3858dfe","3f453389d0cada327703d787cd41b4ec","e989f6eb6ea2a19b104eed28efe11071",
"784ab6b22d3148f3d3f94a75596d0633","1a3f1a762b1e2abec678b08f4d7a1038","53acda4267b6a5a9a6a87b8767aa8cc6","9289c787b0d98e1a5a7a7fb58e102e1a","1bf711a2bc396e7e99340bdaf2ba29d0","3bd33d7e35df95cc6dc576fd67e7356d",
"e481b6eef24800668a6ee5ea576c1f97","563df86d8ae1a6e62e0f0e4278a45cb9","92975ea2d964ac570219d520aaad1d41","c8eab163448aa6bd54fa97af700a939b","396d5385a697326c03800cf37ca4f1a2","8a9a5beb728f47b401fd145e1bb035ba",
"7b6016e18d538a15806f172d0ac7a0cd","5d30dea104b6b7c33f6a120619fdbc6e","e6cc81dc4b558be9892fa4e0715822a1","da2094a93eb167105699cfb6c47466b6",
"a323957ad88e129d8bae2c7b2fd0e63a","a809d51349b6a614581cf57493dc0c94"]

client = ElsClient("f36f0ccc1554186bb009e9ca7b3c14c0")
print(f'Start Time = {datetime.now().strftime("%H:%M:%S")}')

for auth_id in first_author_input_dict_take12:
    given_in = first_author_input_dict_take12[auth_id]["new_given"]
    family_in = first_author_input_dict_take12[auth_id]["new_family"]
    sa_in = first_author_input_dict_take12[auth_id]["SA_list"]

    try:
        auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
        auth_srch.execute(client)
    except:
        if len(avail_api_keys_list) >0:
            with open("author_out_list_loop_save","wb") as p:
                pickle.dump(author_out_list, p)
            print("Loop save complete!")
            new_key = avail_api_keys_list.pop(0)            
            client = ElsClient(new_key)
            
            print(f'Time = {datetime.now().strftime("%H:%M:%S")}')
            
            print(f"Changed API key, now using key: {new_key}")
            used_api_keys_list.append(new_key)

            auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
            auth_srch.execute(client)

        else:
            print("No available API keys to use!")
            break


    country_out = list()
    given_out = list()
    family_out = list()
    doc_count_out = list()

    for auth in auth_srch.results:
        # Country:
        try:
            country_out.append(auth['affiliation-current']['affiliation-country'])
        except:
            pass

        # Given:
        try:
            given_out.append(auth['preferred-name']['given-name'])
        except:
            pass

        # Family:
        try:
            family_out.append(auth['preferred-name']['surname'])
        except:
            pass

        # Doc Count:
        try:
            doc_count_out.append(auth['document-count'])
        except:
            pass

    
    author_out_dict = dict(author_id=auth_id, country=country_out, given=given_out, family=family_out, doc_count=doc_count_out)

    author_out_list.append(author_out_dict)




Start Time = 21:32:05
Loop save complete!
Time = 00:09:23
Changed API key, now using key: a1d21854963d1cca62742c56b5f7633d
Loop save complete!
Time = 00:46:33
Changed API key, now using key: f3bdebaa3f41b7443d8d9d8983c00479
Loop save complete!
Time = 01:17:38
Changed API key, now using key: bfaac6eb78a88e97874fa1d0bfd8c63e
Loop save complete!
Time = 01:23:27
Changed API key, now using key: 546932c3fcd8e21fbc6eab08010b9f44
Loop save complete!
Time = 01:38:54
Changed API key, now using key: 0599d76680c29c71e028fc9b77b0c063
Loop save complete!
Time = 03:10:29
Changed API key, now using key: baaa9e51f8a49461352af25710719157
Loop save complete!
Time = 03:50:00
Changed API key, now using key: 30195f0b2192052a36bcab9ce3c4064f


HTTPError: HTTP 429 Error from https://api.elsevier.com/content/search/author?query=AUTHLAST%28chawla%29+AND+AUTHFIRST%28reena%29+AND+SUBJAREA%28PHAR%29+AND+SUBJAREA%28BIOC%29
and using headers {'X-ELS-APIKey': '30195f0b2192052a36bcab9ce3c4064f', 'User-Agent': 'elsapy-v0.5.0', 'Accept': 'application/json'}:
{"error-response":{"error-code":"TOO_MANY_REQUESTS","error-message":"Request has been placed in time-out for exceeding quota or rate limits. Please reference HTTP header X-RateLimit-Reset for when requests can resubmitted."}}

In [77]:
pd.DataFrame(author_out_list)

Unnamed: 0,author_id,country,given,family,doc_count
0,aut_0,"[China, China, China, Hong Kong]","[Jing, Jingheng, Jing, (Luna) Jing]","[Cai, Cai, Cai, Cai]","[23, 22, 1, 1]"
1,aut_1,"[United States, United States, United States, ...","[Qing, Qing, (Grace) Qing, Grace Qing]","[Hao, Hao, Hao, Hao]","[86, 11, 2, 1]"
2,aut_2,"[China, China, China, China, China, China, Chi...","[Yongquan, Yonghong, Yong, Dayong, Yongwu, Yon...","[Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zho...","[286, 220, 193, 170, 164, 151, 100, 92, 74, 64..."
3,aut_3,"[Italy, Italy, France, Nigeria, Malaysia, Saud...","[Andrea, Alessandro, Ado Adamou Abba, Hafizull...","[Abba, Abbà, Ari, Ahmed, Haruna, Abba, Abba, A...","[75, 65, 44, 19, 17, 15, 8, 3]"
4,aut_4,[Italy],[Antonella],[Agodi],[234]
...,...,...,...,...,...
110108,aut_110114,[United Kingdom],[Reem],[Al-Jawahiri],[6]
110109,aut_110115,[Egypt],[Reem],[El-Gendy],[22]
110110,aut_110116,[United Arab Emirates],[Reem],[Khalil],[10]
110111,aut_110117,[Germany],[Reema],[Anouz],[3]


#### Take 13: Improvements on loop

In [15]:
q1_first_author_df.loc["aut_101446","new_given"] = "o."

In [21]:
# Adding "or "o." etlinger":
first_author_input_dict_take13 = {**q1_first_author_df.drop(["aut_4915", "aut_14163", "aut_18315", "aut_23897", "aut_48200", "aut_112348", "aut_124149"])[["new_given","new_family","SA_list"]][101441:101442].to_dict("index"), **q1_first_author_df.drop(["aut_4915", "aut_14163", "aut_18315", "aut_23897", "aut_48200", "aut_112348", "aut_124149"])[["new_given","new_family","SA_list"]][110322:].to_dict("index")}

In [22]:
# Full Loop - Take 13:
# Fixed API keys lists

#author_out_list= list()

avail_api_keys_list = ["f36f0ccc1554186bb009e9ca7b3c14c0","6ae4b5181b374f521e60d2ef4be73ec5", "c14e197a4982e7349247b189ff26da45", "fbc5e06467522a07f93e247a24d89d3b", 
"3f453389d0cada327703d787cd41b4ec","e989f6eb6ea2a19b104eed28efe11071", "784ab6b22d3148f3d3f94a75596d0633","1a3f1a762b1e2abec678b08f4d7a1038","53acda4267b6a5a9a6a87b8767aa8cc6",
"9289c787b0d98e1a5a7a7fb58e102e1a","1bf711a2bc396e7e99340bdaf2ba29d0","3bd33d7e35df95cc6dc576fd67e7356d", "e481b6eef24800668a6ee5ea576c1f97","563df86d8ae1a6e62e0f0e4278a45cb9",
"92975ea2d964ac570219d520aaad1d41","c8eab163448aa6bd54fa97af700a939b","396d5385a697326c03800cf37ca4f1a2","8a9a5beb728f47b401fd145e1bb035ba", "63db00d1e8f7e5a5ef223abad3858dfe",
"7b6016e18d538a15806f172d0ac7a0cd","5d30dea104b6b7c33f6a120619fdbc6e","e6cc81dc4b558be9892fa4e0715822a1","da2094a93eb167105699cfb6c47466b6",
"a323957ad88e129d8bae2c7b2fd0e63a","a809d51349b6a614581cf57493dc0c94","a1d21854963d1cca62742c56b5f7633d","f3bdebaa3f41b7443d8d9d8983c00479","bfaac6eb78a88e97874fa1d0bfd8c63e",
"546932c3fcd8e21fbc6eab08010b9f44","0599d76680c29c71e028fc9b77b0c063","baaa9e51f8a49461352af25710719157","30195f0b2192052a36bcab9ce3c4064f","b7e95f5ea731eb5c9e84e7a1d499f50e",
"8707db153e4b9672fa6df25b03a5f747","2dab4694b4347fa574d159bb97484fc4","6af3d2c09eb08ec6e12f0cead9a1f5bb","3872e798bf48fa28af583b9ebef5deb6","1473c31dbcdb425f9cdaf75c673279d3"]

used_api_keys_list = list()

current_key = avail_api_keys_list.pop(0)
client = ElsClient(current_key)

print(f'Start Time = {datetime.now().strftime("%H:%M:%S")}')

for auth_id in first_author_input_dict_take13:
    given_in = first_author_input_dict_take13[auth_id]["new_given"]
    family_in = first_author_input_dict_take13[auth_id]["new_family"]
    sa_in = first_author_input_dict_take13[auth_id]["SA_list"]

    try:
        auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
        auth_srch.execute(client)
        cnt_check = True

    except Exception as e:
        err_no = re.search("\d+",str(e))[0]

        print(f"Encountered Error: {err_no}!")
        print(f'Time = {datetime.now().strftime("%H:%M:%S")}')

        with open("author_out_list_loop_save","wb") as p:
                pickle.dump(author_out_list, p)
        print("Loop save complete!")

        if "400" in str(e):
            # query error, need to skip
            print(f"Skipping author: {given_in} {family_in} - {auth_id}")
            cnt_check = False

        elif "401" in str(e):
            # VPN error, sleep try again
            sleep(60)
            print("Slept for 60 & trying again!")
            try:
                auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
                auth_srch.execute(client)
                cnt_check = True
            except:
                print("Got an error again & breaking the loop ! :(")
                break
            
        elif "429" in str(e):
            # change API key 
            while len(avail_api_keys_list) >0:
                used_api_keys_list.append(current_key)
                current_key = avail_api_keys_list.pop(0)
                client = ElsClient(current_key)
                print(f"Changed API key, now using key: {current_key}")
                try:
                    auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
                    auth_srch.execute(client)
                    cnt_check = True
                    print("New key is OK!")
                    break
                except Exception as e:
                    err_no = re.search("\d+",str(e))[0]
                    if err_no == "429":
                        print("New key is also finished!")
                    else:
                        print(f"Skipping author: {given_in} {family_in} - {auth_id}")
                        cnt_check = False
                        break

            if len(avail_api_keys_list) == 0:
                print("No available API keys left & breaking the loop! :(")
                break

        elif "500" in str(e):
            sleep(30)
            print("Slept for 30 & trying again!")
            try:
                auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
                auth_srch.execute(client)
                cnt_check = True
            except:
                cnt_check = False
            pass
        else:
            print("Unknown error & breaking the loop! :(")
            break



    if cnt_check:
        country_out = list()
        given_out = list()
        family_out = list()
        doc_count_out = list()

        for auth in auth_srch.results:
            # Country:
            try:
                country_out.append(auth['affiliation-current']['affiliation-country'])
            except:
                pass

            # Given:
            try:
                given_out.append(auth['preferred-name']['given-name'])
            except:
                pass

            # Family:
            try:
                family_out.append(auth['preferred-name']['surname'])
            except:
                pass

            # Doc Count:
            try:
                doc_count_out.append(auth['document-count'])
            except:
                pass

        author_out_dict = dict(author_id=auth_id, country=country_out, given=given_out, family=family_out, doc_count=doc_count_out)
        author_out_list.append(author_out_dict)




Start Time = 13:58:30
Encountered Error: 429!
Time = 13:58:32
Loop save complete!
Changed API key, now using key: 6ae4b5181b374f521e60d2ef4be73ec5
New key is OK!
Encountered Error: 10054!
Time = 16:46:38
Loop save complete!
Unknown error & breaking the loop! :(


In [48]:
with open("author_out_list","wb") as p:
    pickle.dump(author_out_list, p)

In [47]:
pd.DataFrame(author_out_list)

Unnamed: 0,author_id,country,given,family,doc_count
0,aut_0,"[China, China, China, Hong Kong]","[Jing, Jingheng, Jing, (Luna) Jing]","[Cai, Cai, Cai, Cai]","[23, 22, 1, 1]"
1,aut_1,"[United States, United States, United States, ...","[Qing, Qing, (Grace) Qing, Grace Qing]","[Hao, Hao, Hao, Hao]","[86, 11, 2, 1]"
2,aut_2,"[China, China, China, China, China, China, Chi...","[Yongquan, Yonghong, Yong, Dayong, Yongwu, Yon...","[Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zhou, Zho...","[286, 220, 193, 170, 164, 151, 100, 92, 74, 64..."
3,aut_3,"[Italy, Italy, France, Nigeria, Malaysia, Saud...","[Andrea, Alessandro, Ado Adamou Abba, Hafizull...","[Abba, Abbà, Ari, Ahmed, Haruna, Abba, Abba, A...","[75, 65, 44, 19, 17, 15, 8, 3]"
4,aut_4,[Italy],[Antonella],[Agodi],[234]
...,...,...,...,...,...
115302,aut_115308,"[United States, United States]","[Shelia R., Shelia R.]","[Cotten, Cotten]","[87, 20]"
115303,aut_115309,"[Canada, United States, India, India, United S...","[Sunil Ranjan, Samir Ranjan, Swapan Kumar R., ...","[Das, Das, Das, Das, Das]","[183, 179, 164, 62, 26]"
115304,aut_115310,[United States],[Steven R.],[Fassnacht],[104]
115305,aut_115311,[United States],[Seth Robert],[Irish],[81]


In [49]:
q1_first_author_df.drop(["aut_4915", "aut_14163", "aut_18315", "aut_23897", "aut_48200", "aut_112348", "aut_124149"])[["new_given","new_family","SA_list"]][115307:]

Unnamed: 0_level_0,new_given,new_family,SA_list
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
aut_115313,s. r.,sahoo,[Engineering]
aut_115314,s. r.,shukla,[Agricultural and Biological Sciences]
aut_115315,s. r.,werre,"[Agricultural and Biological Sciences, Veterin..."
aut_115316,s. r. k.,murthy,"[Biochemistry, Genetics and Molecular Biology]"
aut_115317,s. rae,wannier,"[Medicine, Immunology and Microbiology]"
...,...,...,...
aut_146533,žiga,šušteršic,"[Physics and Astronomy, Materials Science, Eng..."
aut_146534,živa bricman,rejc,"[Engineering, Energy, Chemical Engineering, En..."
aut_146535,а.kh.,inoyatov,"[Materials Science, Physics and Astronomy]"
aut_146536,е.,blagodatskaya,"[Agricultural and Biological Sciences, Immunol..."


In [50]:
first_author_input_dict_take_infinite = q1_first_author_df.drop(["aut_4915", "aut_14163", "aut_18315", "aut_23897", "aut_48200", "aut_112348", "aut_124149"])[["new_given","new_family","SA_list"]][115307:].to_dict("index")

In [51]:
# Full Loop - Take 13:
# Fixed API keys lists

#author_out_list= list()

# avail_api_keys_list = ["f36f0ccc1554186bb009e9ca7b3c14c0","6ae4b5181b374f521e60d2ef4be73ec5", "c14e197a4982e7349247b189ff26da45", "fbc5e06467522a07f93e247a24d89d3b", 
# "3f453389d0cada327703d787cd41b4ec","e989f6eb6ea2a19b104eed28efe11071", "784ab6b22d3148f3d3f94a75596d0633","1a3f1a762b1e2abec678b08f4d7a1038","53acda4267b6a5a9a6a87b8767aa8cc6",
# "9289c787b0d98e1a5a7a7fb58e102e1a","1bf711a2bc396e7e99340bdaf2ba29d0","3bd33d7e35df95cc6dc576fd67e7356d", "e481b6eef24800668a6ee5ea576c1f97","563df86d8ae1a6e62e0f0e4278a45cb9",
# "92975ea2d964ac570219d520aaad1d41","c8eab163448aa6bd54fa97af700a939b","396d5385a697326c03800cf37ca4f1a2","8a9a5beb728f47b401fd145e1bb035ba", "63db00d1e8f7e5a5ef223abad3858dfe",
# "7b6016e18d538a15806f172d0ac7a0cd","5d30dea104b6b7c33f6a120619fdbc6e","e6cc81dc4b558be9892fa4e0715822a1","da2094a93eb167105699cfb6c47466b6",
# "a323957ad88e129d8bae2c7b2fd0e63a","a809d51349b6a614581cf57493dc0c94","a1d21854963d1cca62742c56b5f7633d","f3bdebaa3f41b7443d8d9d8983c00479","bfaac6eb78a88e97874fa1d0bfd8c63e",
# "546932c3fcd8e21fbc6eab08010b9f44","0599d76680c29c71e028fc9b77b0c063","baaa9e51f8a49461352af25710719157","30195f0b2192052a36bcab9ce3c4064f","b7e95f5ea731eb5c9e84e7a1d499f50e",
# "8707db153e4b9672fa6df25b03a5f747","2dab4694b4347fa574d159bb97484fc4","6af3d2c09eb08ec6e12f0cead9a1f5bb","3872e798bf48fa28af583b9ebef5deb6","1473c31dbcdb425f9cdaf75c673279d3"]

# used_api_keys_list = list()

# current_key = avail_api_keys_list.pop(0)
# client = ElsClient(current_key)

print(f'Start Time = {datetime.now().strftime("%H:%M:%S")}')

for auth_id in first_author_input_dict_take_infinite:
    given_in = first_author_input_dict_take_infinite[auth_id]["new_given"]
    family_in = first_author_input_dict_take_infinite[auth_id]["new_family"]
    sa_in = first_author_input_dict_take_infinite[auth_id]["SA_list"]

    try:
        auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
        auth_srch.execute(client)
        cnt_check = True

    except Exception as e:
        err_no = re.search("\d+",str(e))[0]

        print(f"Encountered Error: {err_no}!")
        print(f'Time = {datetime.now().strftime("%H:%M:%S")}')

        with open("author_out_list_loop_save","wb") as p:
                pickle.dump(author_out_list, p)
        print("Loop save complete!")

        if "400" in str(e):
            # query error, need to skip
            print(f"Skipping author: {given_in} {family_in} - {auth_id}")
            cnt_check = False

        elif "401" in str(e):
            # VPN error, sleep try again
            sleep(60)
            print("Slept for 60 & trying again!")
            try:
                auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
                auth_srch.execute(client)
                cnt_check = True
            except:
                print("Got an error again & breaking the loop ! :(")
                break
            
        elif "429" in str(e):
            # change API key 
            while len(avail_api_keys_list) >0:
                used_api_keys_list.append(current_key)
                current_key = avail_api_keys_list.pop(0)
                client = ElsClient(current_key)
                print(f"Changed API key, now using key: {current_key}")
                try:
                    auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
                    auth_srch.execute(client)
                    cnt_check = True
                    print("New key is OK!")
                    break
                except Exception as e:
                    err_no = re.search("\d+",str(e))[0]
                    if err_no == "429":
                        print("New key is also finished!")
                    else:
                        print(f"Skipping author: {given_in} {family_in} - {auth_id}")
                        cnt_check = False
                        break

            if len(avail_api_keys_list) == 0:
                print("No available API keys left & breaking the loop! :(")
                break

        elif "500" in str(e):
            sleep(30)
            print("Slept for 30 & trying again!")
            try:
                auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
                auth_srch.execute(client)
                cnt_check = True
            except:
                cnt_check = False
            pass

        elif "10060" in str(e) or "10054" in str(e):
            cnt_check = False
            for i in range(10):
                sleep(30)
                print("Slept for 30 & trying again!")
                print(f"Trial number: {i+1}")
                try:
                    auth_srch = ElsSearch(f'AUTHLAST({family_in}) AND AUTHFIRST({given_in}) AND {sbj_area_query_creator(sa_in)}','author')
                    auth_srch.execute(client)
                    cnt_check = True
                    print("It is now OK!")
                    break
                except Exception as e:
                    err_no = re.search("\d+",str(e))[0]
                    if err_no == "10060":
                        print(f"It is still {err_no}!")
                    else:
                        print(f"Another error: {err_no}")
            
            if not cnt_check:
                print(f"Couldn't fix {err_no} & breaking the loop! :(")
                break

        else:
            print("Unknown error & breaking the loop! :(")
            break



    if cnt_check:
        country_out = list()
        given_out = list()
        family_out = list()
        doc_count_out = list()

        for auth in auth_srch.results:
            # Country:
            try:
                country_out.append(auth['affiliation-current']['affiliation-country'])
            except:
                pass

            # Given:
            try:
                given_out.append(auth['preferred-name']['given-name'])
            except:
                pass

            # Family:
            try:
                family_out.append(auth['preferred-name']['surname'])
            except:
                pass

            # Doc Count:
            try:
                doc_count_out.append(auth['document-count'])
            except:
                pass

        author_out_dict = dict(author_id=auth_id, country=country_out, given=given_out, family=family_out, doc_count=doc_count_out)
        author_out_list.append(author_out_dict)




Start Time = 18:38:09
Encountered Error: 10060!
Time = 18:43:33
Loop save complete!
Slept for 30 & trying again!
Trial number: 1
It is now OK!
Encountered Error: 10060!
Time = 18:54:23
Loop save complete!
Slept for 30 & trying again!
Trial number: 1
It is now OK!
Encountered Error: 10060!
Time = 19:19:01
Loop save complete!
Slept for 30 & trying again!
Trial number: 1
Another error: 443
Slept for 30 & trying again!
Trial number: 2
It is now OK!
Encountered Error: 10060!
Time = 20:21:09
Loop save complete!
Slept for 30 & trying again!
Trial number: 1
It is now OK!
Encountered Error: 10054!
Time = 21:50:45
Loop save complete!
Slept for 30 & trying again!
Trial number: 1
It is now OK!
Encountered Error: 429!
Time = 22:24:10
Loop save complete!
Changed API key, now using key: fbc5e06467522a07f93e247a24d89d3b
New key is OK!
Encountered Error: 10060!
Time = 22:28:01
Loop save complete!
Slept for 30 & trying again!
Trial number: 1
It is now OK!
Encountered Error: 10060!
Time = 22:31:25
Loop s