### Test database

Let's figure out how the database works and what sort of data and analysis is availabe 

documentation: https://pypi.org/project/pybliometrics/
and: https://pybliometrics.readthedocs.io/en/stable/

In [41]:
pip install pandas

Collecting pandas
  Obtaining dependency information for pandas from https://files.pythonhosted.org/packages/e5/cd/c941b51e95992968e3e8abc7180f33b952478abd6943062051517a808db7/pandas-2.1.0-cp311-cp311-macosx_11_0_arm64.whl.metadata
  Downloading pandas-2.1.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (18 kB)
Collecting numpy>=1.23.2 (from pandas)
  Obtaining dependency information for numpy>=1.23.2 from https://files.pythonhosted.org/packages/86/a1/b8ef999c32f26a97b5f714887e21f96c12ae99a38583a0a96e65283ac0a1/numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl.metadata
  Downloading numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl.metadata (5.6 kB)
Collecting pytz>=2020.1 (from pandas)
  Obtaining dependency information for pytz>=2020.1 from https://files.pythonhosted.org/packages/32/4d/aaf7eff5deb402fd9a24a1449a8119f00d74ae9c2efa79f8ef9994261fc2/pytz-2023.3.post1-py2.py3-none-any.whl.metadata
  Downloading pytz-2023.3.post1-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.1 (from

In [22]:
pip install pybliometrics

Note: you may need to restart the kernel to use updated packages.


In [23]:
from pybliometrics.scopus.utils import config
print(config['Authentication']['APIKey'])  # Show keys
print(config['Authentication']['InstToken'])

ac5f4d2994dbec80ddc553e23e50f88b
f0d5da76cc2dd4ca6e74e91512e21c73


In [24]:
# Document-specific information
from pybliometrics.scopus import AbstractRetrieval
ab = AbstractRetrieval("10.1016/j.softx.2019.100263")
ab.title

'pybliometrics: Scriptable bibliometrics using a Python interface to Scopus'

In [25]:
ab.publicationName

'SoftwareX'

In [26]:
ab.authors

[Author(auid=57209617104, indexed_name='Rose M.E.', surname='Rose', given_name='Michael E.', affiliation='60105007'),
 Author(auid=7004212771, indexed_name='Kitchin J.R.', surname='Kitchin', given_name='John R.', affiliation='60027950')]

In [27]:
from pybliometrics.scopus import AuthorRetrieval
au2 = AuthorRetrieval(ab.authors[1].auid)
au2.h_index

39

In [28]:
au1 = AuthorRetrieval(ab.authors[0].auid)
au1.affiliation_current

[Affiliation(id=60105007, parent=None, type='parent', relationship='author', afdispname=None, preferred_name='Max Planck Institute for Innovation and Competition', parent_preferred_name=None, country_code='deu', country='Germany', address_part='Marstallplatz 1', city='Munich', state='Bayern', postal_code='80539', org_domain='ip.mpg.de', org_URL='https://www.ip.mpg.de/en.html')]

In [29]:
# Affiliation information
from pybliometrics.scopus import AffiliationRetrieval
aff1 = AffiliationRetrieval(au1.affiliation_current[0].id)
aff1.author_count

116

## How to search

Search keywords: https://www.scopus.com/search/form.uri?display=advanced

In [32]:
from collections import namedtuple

from pybliometrics.scopus import Search
from pybliometrics.scopus.utils import listify, check_integrity, \
    check_field_consistency

In [33]:
from pybliometrics.scopus import AffiliationSearch
query = "AFFIL(Niels Bohr Institute)"
s = AffiliationSearch(query)
print(s) 

Search 'AFFIL(Niels Bohr Institute)' yielded 5 affiliations as of 2023-09-15:
    Niels Bohr Institutet
    Dark Cosmology Centre
    Niels Bohr Archive
    Niels Bohr International Academy
    Niels Bohr Archive


In [42]:
import pandas as pd
pd.DataFrame(s.affiliations)

Unnamed: 0,eid,name,variant,documents,city,country,parent
0,10-s2.0-60017041,Niels Bohr Institutet,Niels Bohr Institute,18314,Copenhagen,Denmark,0
1,10-s2.0-60095454,Dark Cosmology Centre,,1685,Copenhagen,Denmark,0
2,10-s2.0-112667331,Niels Bohr Archive,,8,Copenhagen,Denmark,0
3,10-s2.0-108739293,Niels Bohr International Academy,Niels Bohr Institute,1,Stockholm,Sweden,0
4,10-s2.0-100499050,Niels Bohr Archive,,1,Kobenhavn,Denmark,0


Search keywords:

ALL("Cognitive architectures") AND AUTHOR-NAME(smith)
TITLE-ABS-KEY(*somatic complaint wom?n) AND PUBYEAR AFT 1993
SRCTITLE(*field ornith*) AND VOLUME(75) AND ISSUE(1) AND PAGES(53-66) 

In [None]:
from pybliometrics.scopus import ScopusSearch

In [73]:
#setting download false first allows us to see the number of results before continuing to downloading documents 
s = ScopusSearch('AFFIL ( Niels Bohr Institutet	) AND KEY(oscillator)', download=False)
s.get_results_size()

1

In [68]:
#once downloading we can go on to extract all the information on the articles 
s = ScopusSearch('AFFIL ( Niels Bohr Institutet	) AND KEY(oscillator)', download=True)

In [69]:
print(s)

Search 'AFFIL ( Niels Bohr Institutet	) AND KEY(oscillator)' yielded 1 document as of 2023-09-15:
    2-s2.0-84869173087


In [70]:
import pandas as pd
df = pd.DataFrame(pd.DataFrame(s.results))
df.columns

Index(['eid', 'doi', 'pii', 'pubmed_id', 'title', 'subtype',
       'subtypeDescription', 'creator', 'afid', 'affilname',
       'affiliation_city', 'affiliation_country', 'author_count',
       'author_names', 'author_ids', 'author_afids', 'coverDate',
       'coverDisplayDate', 'publicationName', 'issn', 'source_id', 'eIssn',
       'aggregationType', 'volume', 'issueIdentifier', 'article_number',
       'pageRange', 'description', 'authkeywords', 'citedby_count',
       'openaccess', 'freetoread', 'freetoreadLabel', 'fund_acr', 'fund_no',
       'fund_sponsor'],
      dtype='object')

In [71]:
df

Unnamed: 0,eid,doi,pii,pubmed_id,title,subtype,subtypeDescription,creator,afid,affilname,affiliation_city,affiliation_country,author_count,author_names,author_ids,author_afids,coverDate,coverDisplayDate,publicationName,issn,source_id,eIssn,aggregationType,volume,issueIdentifier,article_number,pageRange,description,authkeywords,citedby_count,openaccess,freetoread,freetoreadLabel,fund_acr,fund_no,fund_sponsor
0,2-s2.0-84869173087,10.1051/0004-6361/201219335,,,Solar Fe abundance and magnetic fields: Toward...,ar,Article,Fabbian D.,60030840;60017041;60011125;60003044,Københavns Universitet;Niels Bohr Institutet;I...,Copenhagen;Copenhagen;San Cristobal de La Lagu...,Denmark;Denmark;Spain;Spain,4,"Fabbian, D.;Moreno-Insertis, F.;Khomenko, E.;N...",8377211100;6602462779;6701573550;7005099620,60011125-60003044;60011125-60003044;60011125-6...,2012-11-22,2012,Astronomy and Astrophysics,46361,26750,14320746,Journal,548,,A35,,Aims. We investigate the impact on Fe abundanc...,Line: formation | Magnetohydrodynamics (MHD) |...,37,1,repositoryam,Green,"FNU, DFF",MTRN-CT-2006-035484,"Natur og Univers, Det Frie Forskningsråd"


## Retrieval

In [135]:
eid=df.iloc[0,0]
eid

'2-s2.0-84869173087'

In [136]:
ab = AbstractRetrieval(f"{eid}", view='FULL')

In [137]:
print(ab)

D. Fabbian, F. Moreno-Insertis, E. Khomenko and Å Nordlund: "Solar Fe abundance and magnetic fields: Towards a consistent reference metallicity", Astronomy and Astrophysics, 548, (no pages found)(2012). https://doi.org/10.1051/0004-6361/201219335.
37 citation(s) as of 2023-09-15
  Affiliation(s):
   Københavns Universitet
   Niels Bohr Institutet
   Instituto Astrofisico de Canarias
   Universidad de la Laguna


In [149]:
ab.authors

[Author(auid=8377211100, indexed_name='Fabbian D.', surname='Fabbian', given_name='D.', affiliation='60011125;60003044'),
 Author(auid=6602462779, indexed_name='Moreno-Insertis F.', surname='Moreno-Insertis', given_name='F.', affiliation='60011125;60003044'),
 Author(auid=6701573550, indexed_name='Khomenko E.', surname='Khomenko', given_name='E.', affiliation='60011125;60003044'),
 Author(auid=7005099620, indexed_name='Nordlund A.', surname='Nordlund', given_name='Å', affiliation='60017041;60030840')]

In [150]:
import numpy as np
authors=pd.DataFrame(ab.authors)
authors_id=authors.iloc[:,0].astype(str).values.tolist()
authors_id

['8377211100', '6602462779', '6701573550', '7005099620']

['Fabbian', 'Moreno-Insertis', 'Khomenko', 'Nordlund']

In [158]:
author_surnames=authors['surname'].values.tolist()
author_surnames

['Fabbian', 'Moreno-Insertis', 'Khomenko', 'Nordlund']

## Metadata

In [141]:
#only works with manually approved APIkey by Elsevier 
from pybliometrics.scopus import CitationOverview
identifier = ["85068268027", "84930616647"]
co = CitationOverview(identifier, start=2019, end=2021)

Scopus403Error: Requestor configuration settings insufficient for access to this resource.

In [None]:
from pybliometrics.scopus import SubjectClassifications
sub = SubjectClassifications({'description': 'Physics'})

In [None]:
print(sub)

In [142]:
sub.results

[Subject(code='3100', description='Physics and Astronomy', detail='Physics and Astronomy (all)', abbrev='PHYS'),
 Subject(code='3101', description='Physics and Astronomy', detail='Physics and Astronomy (miscellaneous)', abbrev='PHYS'),
 Subject(code='3102', description='Physics and Astronomy', detail='Acoustics and Ultrasonics', abbrev='PHYS'),
 Subject(code='3103', description='Physics and Astronomy', detail='Astronomy and Astrophysics', abbrev='PHYS'),
 Subject(code='3104', description='Physics and Astronomy', detail='Condensed Matter Physics', abbrev='PHYS'),
 Subject(code='3105', description='Physics and Astronomy', detail='Instrumentation', abbrev='PHYS'),
 Subject(code='3106', description='Physics and Astronomy', detail='Nuclear and High Energy Physics', abbrev='PHYS'),
 Subject(code='3107', description='Physics and Astronomy', detail='Atomic and Molecular Physics, and Optics', abbrev='PHYS'),
 Subject(code='3108', description='Physics and Astronomy', detail='Radiation', abbrev='

## Gender identification

1) https://gender-api.com/en/
2) https://namsor.app/ with https://github.com/namsor/namsor-python-sdk2

In [143]:
import json

from urllib.request import urlopen

In [189]:
myKey = "Sv2m4CYx7pVPAGgNhWm9lXzZEHfQkRUzSA69"
url = "https://gender-api.com/get?key=" + myKey + f"&name={author_surnames[0]}"
url

'https://gender-api.com/get?key=Sv2m4CYx7pVPAGgNhWm9lXzZEHfQkRUzSA69&name=Fabbian'

In [196]:
response = urlopen(url)
decoded = response.read().decode('utf-8')
data = json.loads(decoded)
print( "Gender: " + data["gender"] +', '+ "Accurency: " + f"{data['accuracy']}" + '%') 

Gender: male, Accurency: 82%


In [198]:
data

{'name': 'fabbian',
 'name_sanitized': 'Fabbian',
 'country': '',
 'gender': 'male',
 'samples': 11,
 'accuracy': 82,
 'duration': '12ms',
 'credits_used': 1}

In [197]:
#loop over author names
#save results with name, gender, country and accuracy in dataframe 

In [199]:
pip install git+https://github.com/namsor/namsor-python-sdk2.git

Collecting git+https://github.com/namsor/namsor-python-sdk2.git
  Cloning https://github.com/namsor/namsor-python-sdk2.git to /private/var/folders/23/_5nwl30d12q_2dv4hhpqjfl00000gn/T/pip-req-build-y_i4fx41
  Running command git clone --filter=blob:none --quiet https://github.com/namsor/namsor-python-sdk2.git /private/var/folders/23/_5nwl30d12q_2dv4hhpqjfl00000gn/T/pip-req-build-y_i4fx41
  Resolved https://github.com/namsor/namsor-python-sdk2.git to commit 5a48bd6c1b22efb7b2f5cfde258eefdb94f48150
  Preparing metadata (setup.py) ... [?25ldone
Collecting frozendict~=2.3.4 (from openapi-client==2.0.27)
  Downloading frozendict-2.3.8-py311-none-any.whl (14 kB)
Collecting python-dateutil~=2.7.0 (from openapi-client==2.0.27)
  Downloading python_dateutil-2.7.5-py2.py3-none-any.whl (225 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m225.7/225.7 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting typing_extensions~=4.3.0 (from openapi-client==2.0.27)


In [200]:
import openapi_client 


In [206]:
import time
import openapi_client
from pprint import pprint
from openapi_client.apis.tags import admin_api
from openapi_client.model.api_classifier_taxonomy_out import APIClassifierTaxonomyOut
from openapi_client.model.api_classifiers_status_out import APIClassifiersStatusOut
from openapi_client.model.api_key_out import APIKeyOut
from openapi_client.model.api_period_usage_out import APIPeriodUsageOut
from openapi_client.model.api_services_out import APIServicesOut
from openapi_client.model.api_usage_aggregated_out import APIUsageAggregatedOut
from openapi_client.model.api_usage_history_out import APIUsageHistoryOut
from openapi_client.model.region_out import RegionOut
from openapi_client.model.software_version_out import SoftwareVersionOut
# Defining the host is optional and defaults to https://v2.namsor.com/NamSorAPIv2
# See configuration.py for a list of all supported configuration parameters.
configuration = openapi_client.Configuration(
    host = "https://v2.namsor.com/NamSorAPIv2"
)

# The client must configure the authentication and authorization parameters
# in accordance with the API server security policy.
# Examples for each auth method are provided below, use the example that
# satisfies your auth use case.

# Configure API key authorization: api_key
configuration.api_key['api_key'] = 'a3ed1bd679dae042f880bee01ca93e64'



In [207]:
try:
    # Infer the likely gender of a name, given a local context (ISO2 country code).
    api_response = api_instance.gender_geo(first_name, last_name, country_iso2)
    pprint(api_response)
except ApiException as e:
    print("Exception when calling PersonalApi->gender_geo: %s\n" % e)

AttributeError: 'AdminApi' object has no attribute 'gender_geo'