In [None]:
#Importing Libraries
import requests
from dotenv import load_dotenv
import os
import pandas as pd
import json
import random
import math
import time

In [None]:
#Loading Environment Variables
load_dotenv()

In [None]:
#Loading Required API Keys
wos_key = os.getenv('wos_api_key')

In [None]:
#Upload DOIs
dois_df = pd.read_csv('data/merged_df.csv')

In [None]:
#Assigning Columns for Data Frame of WOS Ids
uid_art = []
doi_art = []
author_art = []
journal_art = []

In [None]:
#Retriving UIDs, Author and Journal Names
for n in range(0,len(dois_df)):
    doi = "DO="+dois_df["doi"][n]
    #Setting Parameters to Retrieve UID
    params = {
        "databaseId": "WOK",
        "usrQuery": doi,
        "count": 1,
        "firstRecord": 1
    }
    header = {
        "X-ApiKey": wos_key
    }
    #Making API Call
    r = requests.get("https://wos-api.clarivate.com/api/wos",
                     params=params,
                     headers=header)
    doi_response = r.json()
    #Logging Response
    if(doi_response["QueryResult"]["RecordsFound"]==0):
        doi_art.append(doi)
        uid_art.append(0)
        author_art.append(0)
        journal_art.append(0)
    else:
        num_authors = doi_response["Data"]["Records"]["records"]["REC"][0]["static_data"]["summary"]["names"]["count"]
        if(num_authors>1):
            for i in range(0,num_authors):
                doi_art.append(doi)
                uid_art.append(doi_response["Data"]["Records"]["records"]["REC"][0]["UID"])
                author_art.append(doi_response["Data"]["Records"]["records"]["REC"][0]["static_data"]["summary"]["names"]["name"][i]["full_name"])
                journal_art.append(doi_response["Data"]["Records"]["records"]["REC"][0]["static_data"]["summary"]["titles"]["title"][0]["content"])
        else:
            doi_art.append(doi)
            uid_art.append(doi_response["Data"]["Records"]["records"]["REC"][0]["UID"])
            author_art.append(doi_response["Data"]["Records"]["records"]["REC"][0]["static_data"]["summary"]["names"]["name"]["full_name"])
            journal_art.append(doi_response["Data"]["Records"]["records"]["REC"][0]["static_data"]["summary"]["titles"]["title"][0]["content"])
    time.sleep(3 + random.randint(3, 5))

In [None]:
#Converting Collected Data to Data Frame
df_part_A = pd.DataFrame({
    'doi': doi_art,
    'wos-uid': uid_art,
    'author': author_art,
    'journal': journal_art
})

In [None]:
#Inspecting Data Frame
df_part_A

In [None]:
#Saving Data
df_part_A.to_csv("data/wos_id_data.csv", index=False)

In [None]:
#Dropping Missing Values
filtered_df = df_part_A[df_part_A['wos-uid'] != 0]
filtered_df.reset_index()

In [None]:
#Extracting WOS UIDs
uids = filtered_df['wos-uid']

In [None]:
#Extracting Unique Uids
uids = uids.drop_duplicates()
uids = uids.reset_index()

In [None]:
#Making Collecting Lists
total_refs = []
references = []

In [None]:
#Retrieving References
#Creating Outer Loop
for n in range(0,len(uids)):
    #Setting Querry Parameters and Header for References Retrieval
    params = {
        "databaseId": "WOK",
        "uniqueId": uids['wos-uid'][n],
        "count": 100,
        "firstRecord": 1
    }
    header = {
        "X-ApiKey": wos_key
    }
    #Sending Query
    r = requests.get("https://wos-api.clarivate.com/api/wos/references",
                     params=params,
                     headers=header)
    ref_response = r.json()
    #Collecting Total References
    num_ref = ref_response["QueryResult"]["RecordsFound"]
    total_refs.append(num_ref)
    #Retrieving All References
    if(num_ref == 0):
        references.append(("NA", uids['wos-uid'][n]))
    if(num_ref>100):
        for k in range(0,math.ceil(num_ref/100)):
            params = {
                "databaseId": "WOK",
                "uniqueId": uids['wos-uid'][n],
                "count": 100,
                "firstRecord": (100*k) + 1
            }
            header = {
                "X-ApiKey": wos_key
            }
            #Sending Query
            r = requests.get("https://wos-api.clarivate.com/api/wos/references",
                             params=params,
                             headers=header)
            ref_response = r.json()
            for m in range(0,99):
                try:
                    references.append((ref_response['Data'][m]['CitedAuthor'], uids['wos-uid'][n]))
                except IndexError:
                    print("Index out of bounds error!")
                    print("At n:", n)
                    break
                except Exception as e:
                    print("An error occurred:", e)
                    print("At this n:", n)
                time.sleep(1 + random.randint(1, 2))
                    
    else:
        for i in range(0,num_ref):
            print(i)
            try:
                references.append((ref_response['Data'][i]['CitedAuthor'], uids['wos-uid'][n]))
            except Exception as e:
                print("An error occurred:", e)
    time.sleep(1 + random.randint(1, 2))

In [None]:
#Converting to Data Frame
df_part_B = pd.DataFrame(references, columns=['cited_author', 'wos-uid'])

In [None]:
#Merging From and To Author Data
merged_df = pd.merge(df_part_B, df_part_A, on='wos-uid', how='left')

In [None]:
#Saving Data
merged_df.to_csv("data/citation_data.csv", index=False)