# API Testing

## Python Setup

In [1]:
import requests
import time
import pandas as pd

## Connecting to API

### Login Information

Fetch my password from a separate .txt file:

In [2]:
with open('../data/password.txt', 'r') as myfile:
    password = myfile.read().strip()

In [3]:
login = {
    'username': 'nj995@nyu.edu',
    'password': password
}

### API Parameters

In [4]:
def execute_query(string):
    # Send credentials to login url to retrieve token.
    resp = requests.post('https://app.dimensions.ai/api/auth.json', json=login)
    resp.raise_for_status()

    # Create http header using the generated token.
    headers = {'Authorization': "JWT " + resp.json()['token']}   
    
    # Execute DSL query.    
    resp = requests.post('https://app.dimensions.ai/api/dsl.json', data= string, headers=headers)
    resp = resp.json()
    return resp

## Pulling Comparison Group

### Method 1: Pulling all Researchers who Collaborated with INCA-funded Researchers

#### All Grants awarded to INCA researchers

In [5]:
rsr_info = pd.read_csv('../data/researchers.csv')
rsrs = rsr_info[rsr_info['id'].notnull()]['id'].drop_duplicates().reset_index(drop = True)

In [6]:
ids_t = []
grants_t = []
for i in range(len(rsrs)):
    print('Querying: {}/{} researchers'.format(i+1, len(rsrs)), end = '\r')
    string = '''
    search grants 
    where researchers.id = "{}"
    return grants
    '''.format(rsrs[i])
    resp = execute_query(string)
    for j in range(len(resp['grants'])):
        ids_t.append(rsrs[i])
        grants_t.append(resp['grants'][j]['id'])

Querying: 965/965 researchers

In [7]:
inca_rsrs_grants = pd.DataFrame({'rsr_id': ids_t, 'grant': grants_t})

#### All researchers who collaborated on those grants

In [8]:
grants = inca_rsrs_grants['grant'].drop_duplicates().reset_index(drop = True)

In [9]:
grants_t = []
ids_t = []
for i in range(len(grants)):
    print('Querying: {}/{} grants'.format(i+1, len(grants)), end = '\r')
    string = '''
    search grants 
    where id = "{}"
    return researchers
    '''.format(grants[i])
    resp = execute_query(string)
    for j in range(len(resp['researchers'])):
        grants_t.append(grants[i])
        ids_t.append(resp['researchers'][j]['id'])

Querying: 328/328 grants

In [10]:
collaborating_rsrs = pd.DataFrame({'grant': grants_t, 'rsr_id': ids_t})

#### Analysis of Results

In [11]:
# INCA-funded rsrs:
rsrs_inca = rsrs
print("{} INCA-funded researchers (with Dimensions IDs)".format(len(rsrs_inca)))

# INCA-funded rsrs who have grants with the API:
rsrs_inca_t = inca_rsrs_grants['rsr_id'].drop_duplicates().reset_index(drop = True)
print("{} INCA-funded researchers appear to have grants with the Dimensions API".format(len(rsrs_inca_t)))

# rsrs who are on grants where with >1 INCA-funded rsrs
rsrs_all = collaborating_rsrs['rsr_id'].drop_duplicates().reset_index(drop = True)
print("{} researchers participated in grants with at least one INCA-funded researcher".format(len(rsrs_all)))

965 INCA-funded researchers (with Dimensions IDs)
207 INCA-funded researchers appear to have grants with the Dimensions API
577 researchers participated in grants with at least one INCA-funded researcher


In [12]:
# Spot check: INCA funded rsrs must be in all rsrs:
print(sum(rsrs_inca_t.apply(lambda x: x in list(rsrs_all))) == len(rsrs_inca_t))
print(sum(rsrs_all.apply(lambda x: x not in list(rsrs_inca_t))) 
      == sum(rsrs_all.apply(lambda x: x not in list(rsrs_inca))))

True
True


In [15]:
rsrs_comp_1 = rsrs_all[rsrs_all.apply(lambda x: x not in list(rsrs_inca))].reset_index(drop = True)

# Number of comparison rsrs identified:
print("Size of comparison group identified: {} researchers".format(len(rsrs_comp_1)))

Size of comparison group identified: 370 researchers


### Method 2: Pulling all Researchers from Similar Topics

## Sandbox

In [17]:
string = '''
search grants
where researchers.id = "ur.01265037264.14"
return grants
'''
resp = execute_query(string)

In [21]:
for i in range(len(resp['grants'])):
    print(resp['grants'][i]['id'])

grant.4731058
grant.3800931
grant.4525380
grant.4524698


In [59]:
string = '''
search grants
where researchers.last_name = "Amigorena"
return grants
'''
resp = execute_query(string)

In [60]:
for i in range(len(resp['grants'])):
    print(resp['grants'][i]['id'])

grant.4731058
grant.3800931
grant.4525380
grant.3780243
grant.4524698
