# Retrieve Data by Affiliation

In [34]:
from pathlib import Path
import pandas as pd
import pybliometrics

In [35]:
# pybliometrics.scopus.utils.constants.CONFIG_FILE

Try one affiliation search  

In [36]:
from pybliometrics.scopus import AffiliationSearch
pybliometrics.scopus.init()


Searching by Affiliation Name:

+ Search for "Dartmouth" returns 772 different affiliations (although many are repeats)

    + i.e Includes Dartmouth College, Dartmouth-Hitchcock Medical Center, Geisel School of Medicine, Dartmouth Cancer Center, Dartmouth Health, etc.

+ Search for "Dartmouth-Hitchcock" returns 72 affiliations

In [37]:
#query = f"AFFIL('Dartmouth-Hitchcock')"
query = f"AFFIL('Federal Reserve')"
s = AffiliationSearch(query)
returned_affils = s.affiliations
print(len(returned_affils))
returned_affils



135


[Affiliation(eid='10-s2.0-60023612', name='Federal Reserve System', variant='', documents=3543, city='Washington, D.C.', country='United States'),
 Affiliation(eid='10-s2.0-60019743', name='Federal Reserve Bank of New York', variant='', documents=1347, city='New York', country='United States'),
 Affiliation(eid='10-s2.0-60104179', name='Federal Reserve Bank of St. Louis', variant='', documents=1155, city='St. Louis', country='United States'),
 Affiliation(eid='10-s2.0-60013830', name='Federal Reserve Bank of Chicago', variant='', documents=895, city='Chicago', country='United States'),
 Affiliation(eid='10-s2.0-60091322', name='Federal Reserve Bank of Minneapolis', variant='', documents=793, city='Minneapolis', country='United States'),
 Affiliation(eid='10-s2.0-60112672', name='Federal Reserve Bank of Philadelphia', variant='', documents=733, city='Philadelphia', country='United States'),
 Affiliation(eid='10-s2.0-60031864', name='Federal Reserve Bank of San Francisco', variant='', do

In [38]:
returned_affils[0]

Affiliation(eid='10-s2.0-60023612', name='Federal Reserve System', variant='', documents=3543, city='Washington, D.C.', country='United States')

In [39]:
affil_dict = {}
for affil in returned_affils:
    affil_dict[affil[0]] = {"name": affil.name, "variant": affil.variant,
                            "documents": affil.documents, "city": affil.city,
                            "country": affil.country}
    #affil_dict[affil[0]] = {"name": affil[1], "variant": affil[2],
    #                        "documents": affil[3], "city": affil[4],
    #                        "country": affil[5], "parent": affil[6]}

affil_df = pd.DataFrame.from_dict(affil_dict, orient='index')

In [40]:
affil_df.index.rename("eid")
affil_df.head()

Unnamed: 0,name,variant,documents,city,country
10-s2.0-60023612,Federal Reserve System,,3543,"Washington, D.C.",United States
10-s2.0-60019743,Federal Reserve Bank of New York,,1347,New York,United States
10-s2.0-60104179,Federal Reserve Bank of St. Louis,,1155,St. Louis,United States
10-s2.0-60013830,Federal Reserve Bank of Chicago,,895,Chicago,United States
10-s2.0-60091322,Federal Reserve Bank of Minneapolis,,793,Minneapolis,United States


In [41]:
affil_df.to_csv("../data/affiliations_fed-reserve-search.csv", index_label="eid", encoding='utf-8')

In [42]:
affils_to_keep = ["10-s2.0-60023612",
                  "10-s2.0-60019743",
                  "10-s2.0-60104179",
                  "10-s2.0-60013830",
                  "10-s2.0-60091322",
                  "10-s2.0-60112672",
                  "10-s2.0-60031864",
                  "10-s2.0-60010152",
                  "10-s2.0-60008845",
                  "10-s2.0-60018149",
                  "10-s2.0-60112497",
                  "10-s2.0-60030591",
                  "10-s2.0-60112315",
                  "10-s2.0-60003623"
]


*Note: the affil_df returns duplicates of many of these branches indicating documents of some of these have not been properly parsed.*

In [43]:
fed_affils = affil_df.loc[affils_to_keep]
fed_affils.shape

(14, 5)

In [44]:
# s.get_key_remaining_quota()

In [45]:
# s.get_key_reset_time()

In [46]:
from pybliometrics.scopus import AffiliationRetrieval

fed_list = []
for affil_eid in affils_to_keep:
    aff = AffiliationRetrieval(affil_eid)
    fed_dict = {"eid": aff.eid,
                "affiliation_name": aff.affiliation_name,
                "author_count": aff.author_count,
                "doc_count": aff.document_count,
                #"name_variants": aff.name_variants,
                "scopus_link": aff.scopus_affiliation_link,
                "status": aff.status
                }
    fed_list.append(fed_dict)


In [47]:
affil_ret_df = pd.DataFrame(fed_list)
affil_ret_df = affil_ret_df.set_index("eid")
affil_ret_df.head()

Unnamed: 0_level_0,affiliation_name,author_count,doc_count,scopus_link,status
eid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10-s2.0-60023612,Federal Reserve System,763,0,https://www.scopus.com/affil/profile.uri?afid=...,update
10-s2.0-60019743,Federal Reserve Bank of New York,238,0,https://www.scopus.com/affil/profile.uri?afid=...,update
10-s2.0-60104179,Federal Reserve Bank of St. Louis,123,0,https://www.scopus.com/affil/profile.uri?afid=...,update
10-s2.0-60013830,Federal Reserve Bank of Chicago,117,0,https://www.scopus.com/affil/profile.uri?afid=...,update
10-s2.0-60091322,Federal Reserve Bank of Minneapolis,67,0,https://www.scopus.com/affil/profile.uri?afid=...,update


In [48]:
affil_ret_df2 = affil_ret_df.join(affil_df, how="inner")
affil_ret_df2.head()

Unnamed: 0,affiliation_name,author_count,doc_count,scopus_link,status,name,variant,documents,city,country
10-s2.0-60023612,Federal Reserve System,763,0,https://www.scopus.com/affil/profile.uri?afid=...,update,Federal Reserve System,,3543,"Washington, D.C.",United States
10-s2.0-60019743,Federal Reserve Bank of New York,238,0,https://www.scopus.com/affil/profile.uri?afid=...,update,Federal Reserve Bank of New York,,1347,New York,United States
10-s2.0-60104179,Federal Reserve Bank of St. Louis,123,0,https://www.scopus.com/affil/profile.uri?afid=...,update,Federal Reserve Bank of St. Louis,,1155,St. Louis,United States
10-s2.0-60013830,Federal Reserve Bank of Chicago,117,0,https://www.scopus.com/affil/profile.uri?afid=...,update,Federal Reserve Bank of Chicago,,895,Chicago,United States
10-s2.0-60091322,Federal Reserve Bank of Minneapolis,67,0,https://www.scopus.com/affil/profile.uri?afid=...,update,Federal Reserve Bank of Minneapolis,,793,Minneapolis,United States


In [49]:
drop_cols = ["affiliation_name", "doc_count"]

affil_ret_df2.drop(columns=drop_cols, axis=1, inplace=True)
affil_ret_df2.head()

Unnamed: 0,author_count,scopus_link,status,name,variant,documents,city,country
10-s2.0-60023612,763,https://www.scopus.com/affil/profile.uri?afid=...,update,Federal Reserve System,,3543,"Washington, D.C.",United States
10-s2.0-60019743,238,https://www.scopus.com/affil/profile.uri?afid=...,update,Federal Reserve Bank of New York,,1347,New York,United States
10-s2.0-60104179,123,https://www.scopus.com/affil/profile.uri?afid=...,update,Federal Reserve Bank of St. Louis,,1155,St. Louis,United States
10-s2.0-60013830,117,https://www.scopus.com/affil/profile.uri?afid=...,update,Federal Reserve Bank of Chicago,,895,Chicago,United States
10-s2.0-60091322,67,https://www.scopus.com/affil/profile.uri?afid=...,update,Federal Reserve Bank of Minneapolis,,793,Minneapolis,United States


In [50]:
affil_ret_df2.to_csv("../data/fed-reserve-affils.csv", index_label="eid", encoding='utf-8')

Use AffiliationRetrieval to get program names using "name_variants" object

In [51]:
aff.document_count

0

In [52]:
aff.name_variants

[Variant(name='Federal Reserve Board Division of International Finance', doc_count=None)]

In [53]:
#query = f"SUBJAREA(2732)"
#s = AffiliationSearch(query)
#returned_affils = s.affiliations
#print(len(returned_affils))
#returned_affils