In [167]:
# import, configuration, functions, etc.
# Run this every time you use the script

import json
import requests
from time import sleep
import csv
import datetime
from fuzzywuzzy import fuzz # fuzzy logic matching
import pandas as pd

# VanderBot common code; defines vbc.Query class
import vb_common_code as vbc

# seconds to wait between API calls
api_throttle_time = 0.25

def generate_header_dictionary(accept_media_type):
    user_agent_header = 'ctgBot/0.1 (https://github.com/HeardLibrary/linked-data/tree/master/clinical_trials; mailto:steve.baskauf@vanderbilt.edu)'
    request_header_dictionary = {
        'Accept' : accept_media_type,
        'User-Agent': user_agent_header
    }
    return request_header_dictionary

def get_ctg(parameter, result_start, result_end):
    endpoint_url = 'https://clinicaltrials.gov/api/query/full_studies'
    accept_media_type = 'application/json'
    
    parameters = {
        'expr': parameter,
        'min_rnk': result_start,
        'max_rnk': result_end,
        'fmt': 'json'
    }
    
    r = requests.get(endpoint_url, params = parameters, headers = generate_header_dictionary(accept_media_type))
    data = r.json()
    return data

print('done')

done


In [13]:
# API home https://clinicaltrials.gov/api/gui/home
# For search expression syntax, see https://clinicaltrials.gov/api/gui/ref/syntax 
# For field name crosswalks, see https://clinicaltrials.gov/api/gui/ref/crosswalks

# For details on the query URL structure, see https://clinicaltrials.gov/api/gui/ref/api_urls#queryURLs
# min_rnk (parameter for results start) defaults to 1
# max_rnk (parameter for results end) defaults to min_rnk plus maximum number of records allowed (100 for full_studies)
search_parameter = 'SEARCH[Study]AREA[ResponsiblePartyInvestigatorAffiliation]COVERAGE[Contains] Vanderbilt'

# Send initial query to determine the number of results available
result_start = 1
result_end = 1
data = get_ctg(search_parameter, result_start, result_end)
print(json.dumps(data, indent=2))
number_results = data['FullStudiesResponse']['NStudiesFound']
print(number_results, 'studies found')


{
  "FullStudiesResponse": {
    "APIVrs": "1.01.02",
    "DataVrs": "2020:07:11 22:15:10.853",
    "Expression": "SEARCH[Study]AREA[ResponsiblePartyInvestigatorAffiliation]COVERAGE[Contains] Vanderbilt",
    "NStudiesAvail": 345428,
    "NStudiesFound": 1085,
    "MinRank": 1,
    "MaxRank": 1,
    "NStudiesReturned": 1,
    "FullStudies": [
      {
        "Rank": 1,
        "Study": {
          "ProtocolSection": {
            "IdentificationModule": {
              "NCTId": "NCT04464070",
              "OrgStudyIdInfo": {
                "OrgStudyId": "150895"
              },
              "Organization": {
                "OrgFullName": "Vanderbilt University",
                "OrgClass": "OTHER"
              },
              "BriefTitle": "Pathways of Eicosanoid Metabolism",
              "OfficialTitle": "Novel Pathways of Eicosanoid Metabolism"
            },
            "StatusModule": {
              "StatusVerifiedDate": "July 2020",
              "OverallStatus": "Enrolli

In [17]:
# Request results 100 at a time. 
# The API is fine with overshooting when the last batch requests a result_end number higher than the last record
study_list = []
for request_index in range(int(number_results/100) + 1):
    print(request_index)
    result_start = request_index * 100 +1
    result_end = request_index * 100 + 100
    data = get_ctg(search_parameter, result_start, result_end)
    # print(json.dumps(data, indent=2))
    list_of_records = data['FullStudiesResponse']['FullStudies']
    for study in list_of_records:
        study_dict = study['Study']
        #print(json.dumps(study_dict['ProtocolSection'], indent=2))
        if 'OfficialTitle' in study_dict['ProtocolSection']['IdentificationModule']:
            study_name = study_dict['ProtocolSection']['IdentificationModule']['OfficialTitle']
        elif 'BriefTitle' in study_dict['ProtocolSection']['IdentificationModule']:
            study_name = study_dict['ProtocolSection']['IdentificationModule']['BriefTitle']
        else:
            study_name = ''
        #print(study_name)
        NCTId = study_dict['ProtocolSection']['IdentificationModule']['NCTId']
        pi_name = study_dict['ProtocolSection']['SponsorCollaboratorsModule']['ResponsibleParty']['ResponsiblePartyInvestigatorFullName']
        pi_title = study_dict['ProtocolSection']['SponsorCollaboratorsModule']['ResponsibleParty']['ResponsiblePartyInvestigatorTitle']

        #print(study_dict['ProtocolSection'])
        
        # Not every study has a ContactsLocationsModule
        if 'ContactsLocationsModule' in study_dict['ProtocolSection']:
            contacts = study_dict['ProtocolSection']['ContactsLocationsModule']
        else:
            contacts = ''
        if 'OverallOfficialList' in contacts:
            officials = contacts['OverallOfficialList']['OverallOfficial']
        else:
            officials = []
        study_summary_dict = {'study_name': study_name, 'NCTId': NCTId, 'pi_name': pi_name, 'pi_title': pi_title, 'contacts': contacts, 'officials': officials}
        # print(study_summary_dict)
        study_list.append(study_summary_dict)
    sleep(api_throttle_time)


0
1
2
3
4
5
6
7
8
9
10
[
  {
    "study_name": "Novel Pathways of Eicosanoid Metabolism",
    "NCTId": "NCT04464070",
    "pi_name": "Claus Schneider",
    "pi_title": "Associate Professor of Pharmacology",
    "contacts": {
      "OverallOfficialList": {
        "OverallOfficial": [
          {
            "OverallOfficialName": "Claus M Schneider, PhD",
            "OverallOfficialAffiliation": "Vanderbilt University",
            "OverallOfficialRole": "Principal Investigator"
          }
        ]
      },
      "LocationList": {
        "Location": [
          {
            "LocationFacility": "Vanderbilt University",
            "LocationCity": "Nashville",
            "LocationState": "Tennessee",
            "LocationZip": "37232",
            "LocationCountry": "United States"
          }
        ]
      }
    },
    "officials": [
      {
        "OverallOfficialName": "Claus M Schneider, PhD",
        "OverallOfficialAffiliation": "Vanderbilt University",
        "OverallOff

In [41]:
print(json.dumps(study_list[0:100], indent=2))

[
  {
    "study_name": "Novel Pathways of Eicosanoid Metabolism",
    "NCTId": "NCT04464070",
    "pi_name": "Claus Schneider",
    "pi_title": "Associate Professor of Pharmacology",
    "contacts": {
      "OverallOfficialList": {
        "OverallOfficial": [
          {
            "OverallOfficialName": "Claus M Schneider, PhD",
            "OverallOfficialAffiliation": "Vanderbilt University",
            "OverallOfficialRole": "Principal Investigator"
          }
        ]
      },
      "LocationList": {
        "Location": [
          {
            "LocationFacility": "Vanderbilt University",
            "LocationCity": "Nashville",
            "LocationState": "Tennessee",
            "LocationZip": "37232",
            "LocationCountry": "United States"
          }
        ]
      }
    },
    "officials": [
      {
        "OverallOfficialName": "Claus M Schneider, PhD",
        "OverallOfficialAffiliation": "Vanderbilt University",
        "OverallOfficialRole": "Principal 

In [87]:
pis = []
count = 0
for study in study_list:
#for study in study_list[0:200]:
    count +=1
    has_pi = False
    pi_strings = []
    pi_affiliations = []
    pi_titles = []
    # Don't check the officials list if it's empty
    if len(study['officials']) > 0:
        # Check through all officials to see if any are PIs
        for official in study['officials']:
            if official['OverallOfficialRole'] == 'Principal Investigator':
                has_pi = True
                pi_strings.append(official['OverallOfficialName'])
                if 'OverallOfficialAffiliation' in official:
                    pi_affiliations.append(official['OverallOfficialAffiliation'])
                else:
                    pi_affiliations.append('')
                pi_titles.append('')
    #print(pi_strings)
    if not has_pi:
        if 'contacts' in study:
            if 'OverallOfficialList' in study['contacts']:
                if 'OverallOfficial' in study['contacts']['OverallOfficialList']:
                    if len(study['contacts']['OverallOfficialList']['OverallOfficial']) > 0:
                        for official in study['contacts']['OverallOfficialList']['OverallOfficial']:
                            if official['OverallOfficialRole'] == 'Principal Investigator':
                                has_pi = True
                                pi_strings.append(official['OverallOfficialName'])
                                if 'OverallOfficialAffiliation' in official:
                                    pi_affiliations.append(official['OverallOfficialAffiliation'])
                                else:
                                    pi_affiliations.append('')
                                pi_titles.append('')

    # The "pi_title" field sometimes contains rank or department info that might help with disambiguation
    if 'pi_name' in study:
        # If no name has been found yet, this will be the only one
        if not has_pi:
            pi_strings.append(study['pi_name'])
            pi_affiliations.append('')
            pi_titles.append(study['pi_title'])
        # If the PI has already been found, match the PI name and add its info
        else:
            for name_index in range(len(pi_strings)):
                #ratio = fuzz.ratio(study['pi_name'],name)
                #partial_ratio = fuzz.partial_ratio(study['pi_name'],name)
                #sort_ratio = fuzz.token_sort_ratio(study['pi_name'],name)
                set_ratio = fuzz.token_set_ratio(study['pi_name'],pi_strings[name_index])
                if set_ratio > 75:
                    pi_titles[name_index] = study['pi_title']
                    pi_strings[name_index] += ' | ' + study['pi_name']
                elif set_ratio > 60:
                    print(count)
                    print(set_ratio, study['pi_name'], pi_strings[name_index])
                    response = input('press Enter to accept, or anything else to reject')
                    if response == '':
                        pi_titles[name_index] = study['pi_title']
                        pi_strings[name_index] += ' | ' + study['pi_name']
                else:
                    pi_titles[name_index] = ''
                

    #print(pi_strings)
    #print(pi_titles)
    #print(pi_affiliations)
    #print()
    pis.append({'NCTId': study['NCTId'], 'pi_strings': pi_strings, 'pi_titles': pi_titles, 'pi_affiliations': pi_affiliations})
print('done')

274
61 C. Michael Stein Charles M Stein, MD
press Enter to accept, or anything else to reject
276
61 C. Michael Stein Charles M Stein, MD
press Enter to accept, or anything else to reject
287
69 Steve Davis Stephen N. Davis, MD
press Enter to accept, or anything else to reject
292
61 C. Michael Stein Charles M. Stein, MD
press Enter to accept, or anything else to reject
303
69 Steve Davis Stephen N Davis, MD
press Enter to accept, or anything else to reject
304
69 Steve Davis Stephen N Davis, MD
press Enter to accept, or anything else to reject
312
74 John Allan Barwis John A Barwise, M.D.
press Enter to accept, or anything else to reject
315
74 John Allan Barwis John A Barwise, M.D.
press Enter to accept, or anything else to reject
331
75 Dan France Daniel J France, PhD, MPH
press Enter to accept, or anything else to reject
336
71 Laura Cutting Laurie E. Cutting, Ph.D.
press Enter to accept, or anything else to reject
340
69 Steve Davis Stephen N. Davis, MD
press Enter to accept, or a

Turn the list of dictionaries into a Pandas DataFrame

In [173]:
studies_df = pd.DataFrame(pis)
studies_df.head()

Unnamed: 0,NCTId,pi_strings,pi_titles,pi_affiliations
0,NCT04464070,"[Claus M Schneider, PhD | Claus Schneider | Cl...",[Associate Professor of Pharmacology],[Vanderbilt University]
1,NCT04320251,"[Jack Noble, PhD | Jack Noble | Jack Noble, PhD]","[Assistant Professor, Dept. Electrical Enginee...",[Vanderbilt University]
2,NCT04254718,"[Terrah Akard, PhD | Terrah Akard | Terrah Aka...",[Associate Professor],[Vanderbilt University]
3,NCT04198038,"[Terrah Akard, PhD, RN | Terrah Akard | Terrah...",[Associate Professor],[Vanderbilt University]
4,NCT04149769,"[Michael Goldfarb, PhD | Michael Goldfarb | Mi...",[Professor Of Physical Medicine],[Vanderbilt University]


In [155]:
employerQId = 'Q29052' # Vanderbilt University
sparqlSleep = 0.25 # delay to wait between calls to SPARQL endpoint

# P1416 is affiliation. P749 is parent organization (one or more edges). Q29052 is Vanderbilt University
query_pattern = '''?id wdt:P1416 ?unit.
?unit wdt:P749+ wd:Q29052.'''

# Retrieve a list of Q IDs and English labels
label_data = vbc.Query(labelscreen=query_pattern, sleep=sparqlSleep).labels_descriptions([])

# Retrieve the names of the department affiliations
#department_data = vbc.Query(pid='P1416', sleep=sparqlSleep).search_statement(qIds, [])
acceptMediaType = 'application/json'
wikidataEndpointUrl = 'https://query.wikidata.org/sparql'
requestHeaderDictionary = vbc.generateHeaderDictionary(acceptMediaType)

query = '''
select distinct ?item ?label where {
?item wdt:P1416 ?dept.
?dept wdt:P749+ wd:Q29052.
?dept rdfs:label ?label.
FILTER(lang(?label)='en')
}
'''
#print(query)
results = []
r = requests.get(wikidataEndpointUrl, params={'query' : query}, headers=requestHeaderDictionary)
try:
    data = r.json()
    statements = data['results']['bindings']
    for statement in statements:
        wikidataIri = statement['item']['value']
        if 'label' in statement:
            name = statement['label']['value']
        else:
            name = ''
        qNumber = vbc.extract_qnumber(wikidataIri)
        results.append({'qid': qNumber, 'name': name})
except:
    results = [{'error': r.text}]
department_data = results
print('done')

done


Match the labels to the dept affiliations, then convert to a Pandas DataFrame

In [171]:
vu_wikidata_people = []
for person in label_data:
    found = False
    for dept_affiliation in department_data:
        if person['qid'] == dept_affiliation['qid']:
            found = True
            vu_wikidata_people.append({'qid': person['qid'], 'person_name': person['string'],  'dept_name': dept_affiliation['name']})
            break
    if not found:
            vu_wikidata_people.append({'qid': person['qid'], 'person_name': person['string'],  'dept_name': ''})        
vu_wikidata_people_df = pd.DataFrame(vu_wikidata_people)
vu_wikidata_people_df.head()

Unnamed: 0,qid,person_name,dept_name
0,Q97398089,Shannon Cole,Vanderbilt University School of Nursing
1,Q97398104,Travis Dunlap,Vanderbilt University School of Nursing
2,Q97398119,Jennifer Hicks,Vanderbilt University School of Nursing
3,Q97398092,Courtney Cook,Vanderbilt University School of Nursing
4,Q97398073,Debra Arnow,Vanderbilt University School of Nursing


In [230]:
links = []
non_vanderbilt = []
match_fail = []
score_cutoff = 75
auto_cutoff = 85

# Step through each study and try to disambiguate each PI
for study_index,study_row in studies_df.iterrows():
    print(study_index, 'Study:', 'https://clinicaltrials.gov/ct2/show/' + study_row['NCTId'])
    # Step through each PI in the study
    for pi_number in range(len(study_row['pi_strings'])):
        # First eliminate any PIs that aren't from Vanderbilt
        if (study_row['pi_affiliations'][pi_number] != '') and (not('Vanderbilt' in study_row['pi_affiliations'][pi_number])):
            #print('Not from Vanderbilt!')
            #print(study_row['pi_strings'][pi_number])
            #print(study_row['pi_titles'][pi_number])
            #print(study_row['pi_affiliations'][pi_number])
            non_vanderbilt.append({'NCTId': study_row['NCTId'], 'pi_strings': study_row['pi_strings'][pi_number], 'pi_titles': study_row['pi_titles'][pi_number], 'pi_affiliations': study_row['pi_affiliations'][pi_number]})
        else:
            # Compare each PI string against the Vanderbilt affiliates strings downloaded from Wikidata
            possible_matches = []
            for person_index,person_row in vu_wikidata_people_df.iterrows():
                set_ratio = fuzz.token_set_ratio(study_row['pi_strings'][pi_number], person_row['person_name'])
                if set_ratio >= score_cutoff:
                    possible_matches.append({'person_index': person_index, 'score': set_ratio})
                    #print(set_ratio, person_row['person_name'], study_row['pi_strings'][pi_number])
                    
            if len(possible_matches) == 0:
                match_fail.append({'NCTId': study_row['NCTId'], 'pi_strings': study_row['pi_strings'][pi_number], 'pi_titles': study_row['pi_titles'][pi_number], 'pi_affiliations': study_row['pi_affiliations'][pi_number]})
            else:
                # Create DataFrame of matches sorted by descending score
                possible_matches_df  = pd.DataFrame(possible_matches)
                possible_matches_df.sort_values(by='score', inplace=True, ascending=False)
                possible_matches_df.reset_index(drop=True, inplace=True)

                # Display all hits that have the same top score
                top_hit = possible_matches_df.score.iloc[0]
                count_matches = 0
                for hit,match_row in possible_matches_df.iterrows():
                    if match_row['score'] == top_hit:
                        count_matches += 1
                        if match_row['score'] < auto_cutoff:
                            print(hit, match_row['score'], vu_wikidata_people_df.person_name.iloc[match_row['person_index']], ':', vu_wikidata_people_df.dept_name.iloc[match_row['person_index']])
                            print(' ', '   ', study_row['pi_strings'][pi_number], ':', study_row['pi_titles'][pi_number])
                        else:
                            print(hit, match_row['score'], vu_wikidata_people_df.person_name.iloc[match_row['person_index']], '/', study_row['pi_strings'][pi_number])
                        
                # Don't require human input if above auto cutoff and only one result
                if (count_matches == 1) and (top_hit >= auto_cutoff):
                    links.append({'qid': vu_wikidata_people_df.qid.iloc[possible_matches_df.person_index.iloc[0]], 'NCTId': study_row['NCTId']})
                else:
                    if count_matches == 1:
                        response = input('Press Enter to accept, anything else to reject')
                        if response == '':
                            links.append({'qid': vu_wikidata_people_df.qid.iloc[possible_matches_df.person_index.iloc[0]], 'NCTId': study_row['NCTId']})
                        else:
                            match_fail.append({'NCTId': study_row['NCTId'], 'pi_strings': study_row['pi_strings'][pi_number], 'pi_titles': study_row['pi_titles'][pi_number], 'pi_affiliations': study_row['pi_affiliations'][pi_number]})
                    else:
                        response = input('Enter number of match to accept, or press Enter to reject all')
                        if response == '':
                            match_fail.append({'NCTId': study_row['NCTId'], 'pi_strings': study_row['pi_strings'][pi_number], 'pi_titles': study_row['pi_titles'][pi_number], 'pi_affiliations': study_row['pi_affiliations'][pi_number]})
                        else:
                            response_number = int(response)
                            links.append({'qid': vu_wikidata_people_df.qid.iloc[possible_matches_df.person_index.iloc[response_number]], 'NCTId': study_row['NCTId']})
    print()
print('done')


0 Study: https://clinicaltrials.gov/ct2/show/NCT04464070
0 100 Claus Schneider / Claus M Schneider, PhD | Claus Schneider | Claus M Schneider, PhD

1 Study: https://clinicaltrials.gov/ct2/show/NCT04320251
0 100 Jack Noble / Jack Noble, PhD | Jack Noble | Jack Noble, PhD

2 Study: https://clinicaltrials.gov/ct2/show/NCT04254718
0 86 Terrah Foster Akard / Terrah Akard, PhD | Terrah Akard | Terrah Akard, PhD

3 Study: https://clinicaltrials.gov/ct2/show/NCT04198038
0 77 Terrah Foster Akard : Vanderbilt University School of Nursing
      Terrah Akard, PhD, RN | Terrah Akard | Terrah Akard, PhD, RN : Associate Professor
Press Enter to accept, anything else to reject

4 Study: https://clinicaltrials.gov/ct2/show/NCT04149769
0 100 Michael Goldfarb / Michael Goldfarb, PhD | Michael Goldfarb | Michael Goldfarb, PhD

5 Study: https://clinicaltrials.gov/ct2/show/NCT04144426
0 100 Terry L. Page / Terry L Page, PhD | Terry Page | Terry L Page, PhD

6 Study: https://clinicaltrials.gov/ct2/show/NCT04

0 94 Italo O. Biaggioni / Italo Biaggioni, MD | Italo Biaggioni | Italo Biaggioni, MD

58 Study: https://clinicaltrials.gov/ct2/show/NCT02639637
0 78 Nancy J. Cox : Vanderbilt Department of Medicine
      Nancy J Brown, MD | Nancy J. Brown, MD | Nancy J Brown, MD : Professor
Press Enter to accept, anything else to reject0

59 Study: https://clinicaltrials.gov/ct2/show/NCT02631421
0 93 Evan L. Brittain / Evan Brittain, MD, MSCI | Evan Brittain | Evan Brittain, MD, MSCI

60 Study: https://clinicaltrials.gov/ct2/show/NCT02625181
0 83 Jonathan Porter Wanderer : Vanderbilt Department of Anesthesiology
      Jonathan P Wanderer, MD, MPhil | Jonathan Wanderer | Jonathan P Wanderer, MD, MPhil : Medical Director of Procedure Preparation Center
Press Enter to accept, anything else to reject

61 Study: https://clinicaltrials.gov/ct2/show/NCT02614040
0 92 Wesley H. Self / Wesley Self, MD MPH | Wesley Self | Wesley Self, MD MPH

62 Study: https://clinicaltrials.gov/ct2/show/NCT02611557
0 100 Manus 

Press Enter to accept, anything else to reject

117 Study: https://clinicaltrials.gov/ct2/show/NCT02159079
0 100 Matthew W. Semler / Matthew W. Semler, M.D. | Matthew Semler

118 Study: https://clinicaltrials.gov/ct2/show/NCT02158390
0 100 Ann Kaiser / Ann P Kaiser, PhD | Ann Kaiser

119 Study: https://clinicaltrials.gov/ct2/show/NCT02149901
0 100 Emily M. Garland / Emily M Garland, PhD, MSCI | Emily M. Garland

120 Study: https://clinicaltrials.gov/ct2/show/NCT02131012
0 100 Stephen J. Kim / Stephen J. Kim, MD

121 Study: https://clinicaltrials.gov/ct2/show/NCT02130687
0 78 Nancy J. Cox : Vanderbilt Department of Medicine
      Nancy J Brown, M.D. | Nancy J. Brown, MD : Hugh Jackson Morgan Professor
Press Enter to accept, anything else to reject0

122 Study: https://clinicaltrials.gov/ct2/show/NCT02129712
0 100 Bruce Compas / Bruce E Compas, PhD | Bruce Compas

123 Study: https://clinicaltrials.gov/ct2/show/NCT02122380

124 Study: https://clinicaltrials.gov/ct2/show/NCT02088320
0 94 K

0 100 Emily M. Garland / Emily M Garland, PhD, MSCI | Emily M. Garland

175 Study: https://clinicaltrials.gov/ct2/show/NCT01556919
0 100 Michael F. Vaezi / Michael Vaezi

176 Study: https://clinicaltrials.gov/ct2/show/NCT01550315

177 Study: https://clinicaltrials.gov/ct2/show/NCT01547117

178 Study: https://clinicaltrials.gov/ct2/show/NCT01533896
0 100 Seth J. Scholer / Seth Scholer

179 Study: https://clinicaltrials.gov/ct2/show/NCT01525004
0 93 Natasha B. Halasa / Natasha Halasa, MD | Natasha Halasa

180 Study: https://clinicaltrials.gov/ct2/show/NCT01496794
0 100 Stephen J. Kim / Stephen J Kim, MD | Stephen J. Kim, MD

181 Study: https://clinicaltrials.gov/ct2/show/NCT01490320
0 100 Seth J. Scholer / Seth J Scholer, MD, MPH | Seth Scholer

182 Study: https://clinicaltrials.gov/ct2/show/NCT01488292

183 Study: https://clinicaltrials.gov/ct2/show/NCT01474863
0 90 Todd W. Rice / Todd Rice, MD | Todd Rice

184 Study: https://clinicaltrials.gov/ct2/show/NCT01474785
0 92 Naji N. Abumrad 

0 100 Kelly A. Birdwell / Kelly A Birdwell, MD | Kelly Birdwell

243 Study: https://clinicaltrials.gov/ct2/show/NCT00993694
0 86 Adam John Esbenshade / Adam J. Esbenshade, MD | Adam Esbenshade

244 Study: https://clinicaltrials.gov/ct2/show/NCT00993460
0 81 Charles Robert Flynn : Vanderbilt Department of Surgery
      Charles R Flynn, PhD | Charles R. Flynn : Assistant Professor
Press Enter to accept, anything else to reject

245 Study: https://clinicaltrials.gov/ct2/show/NCT00993291
0 100 Fenna T. Phibbs / Fenna T Phibbs, MD | Fenna Phibbs

246 Study: https://clinicaltrials.gov/ct2/show/NCT00983463
0 81 Charles Robert Flynn : Vanderbilt Department of Surgery
      Charles R Flynn, PhD | Charles R. Flynn : Assistant Professor
Press Enter to accept, anything else to reject

247 Study: https://clinicaltrials.gov/ct2/show/NCT00978289
0 92 Naji N. Abumrad / Naji Abumrad, MD | Naji Abumrad

248 Study: https://clinicaltrials.gov/ct2/show/NCT00975923

249 Study: https://clinicaltrials.gov/ct2


304 Study: https://clinicaltrials.gov/ct2/show/NCT00732160
0 100 James M. Luther / James M Luther, MD | James Matt Luther

305 Study: https://clinicaltrials.gov/ct2/show/NCT00732069
0 78 Nancy J. Cox : Vanderbilt Department of Medicine
      Nancy J Brown, MD | Nancy J. Brown : Professor
Press Enter to accept, anything else to reject0

306 Study: https://clinicaltrials.gov/ct2/show/NCT00731302
0 100 C. Michael Stein / C M Stein, M.D. | C. Michael Stein

307 Study: https://clinicaltrials.gov/ct2/show/NCT00730223

308 Study: https://clinicaltrials.gov/ct2/show/NCT00729924
0 100 David W. Haas / David W Haas, MD | David Haas

309 Study: https://clinicaltrials.gov/ct2/show/NCT00715585
0 94 Russell L. Rothman / Russell Rothman, MD MPP | Russell Rothman

310 Study: https://clinicaltrials.gov/ct2/show/NCT00715494

311 Study: https://clinicaltrials.gov/ct2/show/NCT00715338
0 100 John Allan Barwise / John A Barwise, M.D. | John Allan Barwis

312 Study: https://clinicaltrials.gov/ct2/show/NCT007

0 76 Benjamin Legg : Vanderbilt Department of Spanish and Portuguese
      Benjamin S Heavrin, MD | Ben Heavrin : Assistant Professor of Emergency Medicine
1 76 Benjamin Eden : Vanderbilt Department of Economics
      Benjamin S Heavrin, MD | Ben Heavrin : Assistant Professor of Emergency Medicine
Enter number of match to accept, or press Enter to reject all

371 Study: https://clinicaltrials.gov/ct2/show/NCT00552084
0 100 C. Michael Stein / Charles M. Stein | C. Michael Stein

372 Study: https://clinicaltrials.gov/ct2/show/NCT00525109

373 Study: https://clinicaltrials.gov/ct2/show/NCT00517582
0 78 Nancy J. Cox : Vanderbilt Department of Medicine
      Nancy J. Brown, MD | Nancy J. Brown : Professor
Press Enter to accept, anything else to reject0

374 Study: https://clinicaltrials.gov/ct2/show/NCT00515216
0 83 Laura Ann Williams Goff : Vanderbilt Department of Medicine
      Laura Goff, M.D. : 
Press Enter to accept, anything else to reject

375 Study: https://clinicaltrials.gov/ct2/s

0 94 James R. Goldenring / James Goldenring, MD | James Goldenring

435 Study: https://clinicaltrials.gov/ct2/show/NCT04354701
0 93 Jeremy L. Warner / Jeremy Warner, MD | Jeremy Warner

436 Study: https://clinicaltrials.gov/ct2/show/NCT04327999
0 100 Rachel Sobel / Rachel Sobel, MD | Rachel Sobel

437 Study: https://clinicaltrials.gov/ct2/show/NCT04310345

438 Study: https://clinicaltrials.gov/ct2/show/NCT04299425
0 100 Carmen C. Solorzano / Carmen C Solorzano, MD, FACS | Carmen Solorzano

439 Study: https://clinicaltrials.gov/ct2/show/NCT04281875
0 100 Carmen C. Solorzano / Carmen C Solorzano, MD, FACS | Carmen Solorzano

440 Study: https://clinicaltrials.gov/ct2/show/NCT04250194
0 100 Fabien Maldonado / Fabien Maldonado, MD | Fabien Maldonado

441 Study: https://clinicaltrials.gov/ct2/show/NCT04146779
0 93 Barbara A. Murphy / Barbara Murphy, MD | Barbara Murphy, MD

442 Study: https://clinicaltrials.gov/ct2/show/NCT04128267

443 Study: https://clinicaltrials.gov/ct2/show/NCT04073017



507 Study: https://clinicaltrials.gov/ct2/show/NCT01951664
0 100 Sheila Ridner / Sheila H Ridner, PhD, RN | Sheila Ridner
0 93 Barbara A. Murphy / Barbara Murphy, MD

508 Study: https://clinicaltrials.gov/ct2/show/NCT01938651

509 Study: https://clinicaltrials.gov/ct2/show/NCT01928160
0 100 Leora Horn / Leora Horn | Leora Horn, MD

510 Study: https://clinicaltrials.gov/ct2/show/NCT01918306
0 100 Vandana G. Abramson / Vandana G. Abramson, MD | Vandana Abramson

511 Study: https://clinicaltrials.gov/ct2/show/NCT01901367
0 91 Adam John Esbenshade / Adam Esbenshade, MD | Adam Esbenshade

512 Study: https://clinicaltrials.gov/ct2/show/NCT01891747
0 90 Stephen Wesley Clark / Stephen Clark, MD | Stephen Clark, MD

513 Study: https://clinicaltrials.gov/ct2/show/NCT01882231
0 92 Vicki L. Keedy / Vicki Keedy, MD | Vicki Keedy, MD

514 Study: https://clinicaltrials.gov/ct2/show/NCT01850108
0 100 Adetola Kassim / Adetola A Kassim, MD | Adetola A. Kassim

515 Study: https://clinicaltrials.gov/ct2/

0 100 Madan Jagasia / Madan Jagasia, MD | Madan Jagasia, MD

573 Study: https://clinicaltrials.gov/ct2/show/NCT00712556
0 100 Adetola Kassim / Adetola A. Kassim, MD | Adetola A. Kassim

574 Study: https://clinicaltrials.gov/ct2/show/NCT00680758
0 92 Ingrid A. Mayer / Ingrid Mayer, MD

575 Study: https://clinicaltrials.gov/ct2/show/NCT00666211

576 Study: https://clinicaltrials.gov/ct2/show/NCT00664105
0 92 Vicki L. Keedy / Vicki Keedy, MD

577 Study: https://clinicaltrials.gov/ct2/show/NCT00656604

578 Study: https://clinicaltrials.gov/ct2/show/NCT00655005
0 93 Barbara A. Murphy / Barbara Murphy, MD | Barbara Murphy, MD

579 Study: https://clinicaltrials.gov/ct2/show/NCT00653328
0 90 Marta Ann Crispens / Marta Crispens, MD | Marta Crispens, MD

580 Study: https://clinicaltrials.gov/ct2/show/NCT00653250
0 92 Vicki L. Keedy / Vicki Keedy, MD | Vicki Keedy, MD

581 Study: https://clinicaltrials.gov/ct2/show/NCT00651976
0 97 Ingrid M. Meszoely / Ingrid Meszoely, MD | Ingrid Meszoely, MD

5

0 100 Jane F. Ferguson / Jane Ferguson

634 Study: https://clinicaltrials.gov/ct2/show/NCT04406792
0 100 Rolanda Lamora Lister / Rolanda Lister | Rolanda Lister

635 Study: https://clinicaltrials.gov/ct2/show/NCT04391166
0 100 Sapna S. Gangaputra / Sapna Gangaputra

636 Study: https://clinicaltrials.gov/ct2/show/NCT04388371
0 87 Timothy Scott Blackwell / Timothy S Blackwell, MD | Timothy Blackwell

637 Study: https://clinicaltrials.gov/ct2/show/NCT04383756
0 100 Clayne Benson / Clayne Benson, MD | Clayne Benson

638 Study: https://clinicaltrials.gov/ct2/show/NCT04372628
0 90 Todd W. Rice / Todd Rice, MD | Todd Rice

639 Study: https://clinicaltrials.gov/ct2/show/NCT04362176
0 90 Todd W. Rice / Todd Rice, MD | Todd Rice

640 Study: https://clinicaltrials.gov/ct2/show/NCT04359797
0 100 Todd W. Rice / Todd W Rice, MD, MSc | Todd Rice

641 Study: https://clinicaltrials.gov/ct2/show/NCT04349644
0 85 Blythe Anne Corbett / Blythe A Corbett, Ph.D. | Blythe Corbett

642 Study: https://clinicalt

0 100 Brian C Drolet / Brian Drolet

698 Study: https://clinicaltrials.gov/ct2/show/NCT04008303
0 100 Brian C Drolet / Brian Drolet

699 Study: https://clinicaltrials.gov/ct2/show/NCT04000971
0 93 Kenneth J. Gaines / Kenneth Gaines, MD | Kenneth Gaines
0 78 Bil Jackson : Blair School of Music
      Barry Jackson : 
Press Enter to accept, anything else to reject0

700 Study: https://clinicaltrials.gov/ct2/show/NCT03997409

701 Study: https://clinicaltrials.gov/ct2/show/NCT03991299
0 100 Naji N. Abumrad / Naji Abumrad

702 Study: https://clinicaltrials.gov/ct2/show/NCT03979677
0 75 Richard A. Pride : Vanderbilt Department of Political Science
      Richard A. Roberts, Ph.D. : Assistant Professor
1 75 Richard A. Stein : Vanderbilt Department of Molecular Physiology and Biophysics
      Richard A. Roberts, Ph.D. : Assistant Professor
Enter number of match to accept, or press Enter to reject all

703 Study: https://clinicaltrials.gov/ct2/show/NCT03973801
0 78 Bil Jackson : Blair School of M

0 90 Jenna Leigh Walters / Jenna Walters, MD | Jenna Walters

760 Study: https://clinicaltrials.gov/ct2/show/NCT03678870
0 94 Sarah S. Osmundson / Sarah Osmundson, MD | Sarah Osmundson

761 Study: https://clinicaltrials.gov/ct2/show/NCT03666520
0 83 Robert Reder : Vanderbilt University Law School
      Robert Turer : Clinical Informatics Fellow
Press Enter to accept, anything else to reject0

762 Study: https://clinicaltrials.gov/ct2/show/NCT03666429

763 Study: https://clinicaltrials.gov/ct2/show/NCT03654066
0 93 Dhyanesh A. Patel / Dhyanesh Patel, MD | Dhyanesh Patel

764 Study: https://clinicaltrials.gov/ct2/show/NCT03649178
0 100 C. Michael Stein / Charles M Stein, MBChB | C. Michael Stein

765 Study: https://clinicaltrials.gov/ct2/show/NCT03644693

766 Study: https://clinicaltrials.gov/ct2/show/NCT03634488
0 100 Michael Rutledge DeBaun / Michael DeBaun | Michael DeBaun

767 Study: https://clinicaltrials.gov/ct2/show/NCT03634462
0 100 Manus J. Donahue / Manus J Donahue, PhD | Manus

0 84 Brian Richard Lindman : Vanderbilt Department of Medicine
      Brian R Lindman, MD | Brian Lindman : Associate Professor
Press Enter to accept, anything else to reject

829 Study: https://clinicaltrials.gov/ct2/show/NCT03268122
0 100 Todd W. Rice / Todd W Rice, MD, MSc | Todd Rice

830 Study: https://clinicaltrials.gov/ct2/show/NCT03266471
0 92 Elizabeth Ann Scoville / Elizabeth Scoville, MD | Elizabeth Scoville

831 Study: https://clinicaltrials.gov/ct2/show/NCT03265483
0 100 Martha J. Shrubsole / Martha Shrubsole

832 Study: https://clinicaltrials.gov/ct2/show/NCT03263819
0 100 Cyndya A. Shibao / cyndya shibao | Cyndya Shibao

833 Study: https://clinicaltrials.gov/ct2/show/NCT03251586

834 Study: https://clinicaltrials.gov/ct2/show/NCT03250988
0 87 Charles David Weaver / David Charles, M.D. | David Charles
1 87 Philip David Charles / David Charles, M.D. | David Charles
Enter number of match to accept, or press Enter to reject all1

835 Study: https://clinicaltrials.gov/ct2/show

0 100 Harvey J. Murff / Harvey J Murff, MD, MPH | Harvey Murff

888 Study: https://clinicaltrials.gov/ct2/show/NCT03076359
0 100 Carolyn Audet / Carolyn Audet, PhD | Carolyn Audet

889 Study: https://clinicaltrials.gov/ct2/show/NCT03069716
0 93 Evan L. Brittain / Evan Brittain, MD, MSc | Evan Brittain

890 Study: https://clinicaltrials.gov/ct2/show/NCT03067649
0 83 Judy Garber : Vanderbilt Peabody Department of Psychology and Human Development
      Judith Garber : Professor
Press Enter to accept, anything else to reject

891 Study: https://clinicaltrials.gov/ct2/show/NCT03063476
0 100 Jane F. Ferguson / Jane F Ferguson, PhD | Jane Ferguson

892 Study: https://clinicaltrials.gov/ct2/show/NCT03063463
0 93 Michael F. Vaezi / Michael Vaezi, MD,PhD | Michael Vaezi

893 Study: https://clinicaltrials.gov/ct2/show/NCT03058861
0 92 Seth J. Scholer / Seth Scholer, MD,MPH | Seth Scholer

894 Study: https://clinicaltrials.gov/ct2/show/NCT03057678
0 100 Robert F. Labadie / Robert F. Labadie, MD, P

0 100 Naji N. Abumrad / Naji N Abumrad, MD | Naji Abumrad

948 Study: https://clinicaltrials.gov/ct2/show/NCT02526485
0 100 Elizabeth J. Phillips / Elizabeth Phillips Phillips, MD | Elizabeth J Phillips

949 Study: https://clinicaltrials.gov/ct2/show/NCT02481817
0 94 Alexander H. Gelbard / Alexander Gelbard, MD | Alexander Gelbard, MD

950 Study: https://clinicaltrials.gov/ct2/show/NCT02481596
0 78 Lindsay Satterwhite Mayberry : Vanderbilt Department of Medicine
      Lindsay S Mayberry, MS, PhD | Lindsay Mayberry : Assistant Professor
Press Enter to accept, anything else to reject

951 Study: https://clinicaltrials.gov/ct2/show/NCT02469077
0 94 Stephen P. Bruehl / Stephen Bruehl, PhD | Stephen Bruehl, PhD

952 Study: https://clinicaltrials.gov/ct2/show/NCT02463409
0 100 Ashley H. Shoemaker / Ashley H Shoemaker, MD, MSCI | Ashley Shoemaker

953 Study: https://clinicaltrials.gov/ct2/show/NCT02458482

954 Study: https://clinicaltrials.gov/ct2/show/NCT02436356
0 100 Jeffry S. Nyman / Jeff

0 100 Alfredo Gamboa / Alfredo Gamboa

1005 Study: https://clinicaltrials.gov/ct2/show/NCT01228955
0 100 Debra L. Friedman / Debra L Friedman, MD, MS | Debra Friedman

1006 Study: https://clinicaltrials.gov/ct2/show/NCT01225718
0 100 Adam John Esbenshade / Adam Esbenshade

1007 Study: https://clinicaltrials.gov/ct2/show/NCT01211522

1008 Study: https://clinicaltrials.gov/ct2/show/NCT01205529

1009 Study: https://clinicaltrials.gov/ct2/show/NCT01196377
0 100 Donald H. Arnold / Donald H Arnold, MD, MPH | Donald H Arnold

1010 Study: https://clinicaltrials.gov/ct2/show/NCT01194323
0 93 Michael F. Vaezi / Michael Vaezi, MD, PhD | Michael Vaezi

1011 Study: https://clinicaltrials.gov/ct2/show/NCT01162343
0 100 Jin H Han / Jin H Han, MD, MSc | Jin H. Han

1012 Study: https://clinicaltrials.gov/ct2/show/NCT01145365
0 85 David Allen Schwartz / David A Schwartz, MD | David Schwartz

1013 Study: https://clinicaltrials.gov/ct2/show/NCT01139294

1014 Study: https://clinicaltrials.gov/ct2/show/NCT0

Press Enter to accept, anything else to reject

1074 Study: https://clinicaltrials.gov/ct2/show/NCT00338065
0 91 Karen M. Joos / Karen Joos, MD,PhD | Karen Joos

1075 Study: https://clinicaltrials.gov/ct2/show/NCT00310401
0 100 Lorraine B. Ware / Lorraine B Ware, M.D. | Lorraine B. Ware

1076 Study: https://clinicaltrials.gov/ct2/show/NCT00282152
0 84 Charles David Weaver : Vanderbilt Department of Chemistry
      P. David Charles, MD | David Charles : Associate Professor of Neurology
1 84 Philip David Charles : Vanderbilt Department of Neurology
      P. David Charles, MD | David Charles : Associate Professor of Neurology
Enter number of match to accept, or press Enter to reject all1

1077 Study: https://clinicaltrials.gov/ct2/show/NCT00241800
0 100 Wayne A. Ray / Wayne Ray | Wayne Ray

1078 Study: https://clinicaltrials.gov/ct2/show/NCT00223678
0 94 Anthony J. Langone / Anthony Langone, M.D. | Anthony Langone

1079 Study: https://clinicaltrials.gov/ct2/show/NCT00212160
0 100 Naji N. 

In [231]:
links_df  = pd.DataFrame(links)
links_df.to_csv('links.csv', index=False)
match_fail_df  = pd.DataFrame(match_fail)
match_fail_df.to_csv('match_fail.csv', index=False)
non_vanderbilt_df  = pd.DataFrame(non_vanderbilt)
non_vanderbilt_df.to_csv('non_vanderbilt.csv', index=False)
print('done')


done


Look up all of the clinical trial Wikidata Q IDs

In [243]:
NCTId_qids = []
links_df = pd.read_csv('links_edited.csv')
for index,row in links_df.iterrows():
    if index%10 == 0:
        print(index)
    query = '''select distinct ?entity where {
  ?entity wdt:P3098 "''' + row['NCTId'] + '''".
  }'''
    #print(query)
    study_iri_list = vbc.Query(sleep=sparqlSleep).generic_query(query)
    if len(study_iri_list) ==1:
        NCTId_qids.append(vbc.extract_qnumber(study_iri_list[0]))
    else:
        NCTId_qids.append('')
print(NCTId_qids)

0
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380
390
400
410
420
430
440
450
460
470
480
490
500
510
520
530
540
550
560
570
580
590
600
610
620
630
640
650
660
670
680
690
700
710
720
730
740
750
760
770
780
790
800
810
820
830
840
850
860
870
880
890
900
910
920
930
['', 'Q89154959', 'Q86261310', 'Q79109559', 'Q74273301', 'Q74271356', 'Q76983568', 'Q66063706', 'Q66064036', 'Q64121354', 'Q63572694', 'Q64791948', 'Q63594914', 'Q62041507', 'Q62819272', 'Q63811440', 'Q64819220', 'Q64836594', 'Q61909775', 'Q66044941', 'Q66044941', 'Q66046881', 'Q66072670', 'Q66072783', 'Q63813135', 'Q66074198', 'Q63574102', 'Q66077715', 'Q66391436', 'Q66078254', 'Q66079194', 'Q66079863', 'Q66079712', 'Q66081582', 'Q66394421', 'Q66082297', 'Q66083753', 'Q66083925', 'Q66402861', 'Q66402924', 'Q66405205', 'Q66405215', 'Q66404697', 'Q66405552', 'Q66546810', 'Q66547150', 'Q66408639', 'Q65393262', 'Q65400993', 'Q6540

insert the Wikidata IDs (should have done in last step)

In [250]:
links_df = pd.read_csv('links_edited.csv').fillna('')
for study_index in range(len(NCTId_qids)):
    links_df.iat[study_index, 1] = NCTId_qids[study_index]
links_df.to_csv('links_edited.csv', index=False)
links_df

Unnamed: 0,NCTId,wikidataId,pi_uuid,pi,pi_reference_hash,pi_reference_url,pi_retrieved
0,NCT04464070,,,Q88144488,,https://clinicaltrials.gov/ct2/show/NCT04464070,2020-07-15T00:00:00Z
1,NCT04320251,Q89154959,,Q86851856,,https://clinicaltrials.gov/ct2/show/NCT04320251,2020-07-15T00:00:00Z
2,NCT04254718,Q86261310,,Q97398064,,https://clinicaltrials.gov/ct2/show/NCT04254718,2020-07-15T00:00:00Z
3,NCT04198038,Q79109559,,Q97398064,,https://clinicaltrials.gov/ct2/show/NCT04198038,2020-07-15T00:00:00Z
4,NCT04149769,Q74273301,,Q86852074,,https://clinicaltrials.gov/ct2/show/NCT04149769,2020-07-15T00:00:00Z
...,...,...,...,...,...,...,...
933,NCT00608348,Q64724717,,Q93222253,,https://clinicaltrials.gov/ct2/show/NCT00608348,2020-07-15T00:00:00Z
934,NCT00608816,Q64724641,,Q93222253,,https://clinicaltrials.gov/ct2/show/NCT00608816,2020-07-15T00:00:00Z
935,NCT00780195,Q64710801,,Q93222253,,https://clinicaltrials.gov/ct2/show/NCT00780195,2020-07-15T00:00:00Z
936,NCT00574912,Q64725697,,Q93222253,,https://clinicaltrials.gov/ct2/show/NCT00574912,2020-07-15T00:00:00Z


Had to cull out rows that didn't have Q IDs

Verify that the links to be written haven't already been asserted. Hack from vb4_download_wikidata.py

In [252]:
links_df = pd.read_csv('links_edited.csv').fillna('')
qids = []
for study_index,study_row in links_df.iterrows():
    qids.append(study_row['wikidataId'])
qids

['Q58845895',
 'Q59772547',
 'Q61707464',
 'Q61865581',
 'Q61894357',
 'Q61894379',
 'Q61894455',
 'Q61894456',
 'Q61894524',
 'Q61894687',
 'Q61904756',
 'Q61904778',
 'Q61907710',
 'Q61908755',
 'Q61908764',
 'Q61908839',
 'Q61909468',
 'Q61909775',
 'Q61909920',
 'Q61914512',
 'Q61914995',
 'Q61917479',
 'Q61921488',
 'Q61921989',
 'Q61924781',
 'Q61932564',
 'Q61932664',
 'Q61935815',
 'Q61936298',
 'Q61936559',
 'Q61936562',
 'Q61936562',
 'Q61937070',
 'Q61938338',
 'Q61939443',
 'Q61957344',
 'Q61965780',
 'Q61969449',
 'Q61969900',
 'Q61969994',
 'Q61974922',
 'Q61975723',
 'Q61978917',
 'Q61979562',
 'Q61980113',
 'Q61980136',
 'Q61980227',
 'Q61980466',
 'Q61981320',
 'Q62025113',
 'Q62025275',
 'Q62025527',
 'Q62031252',
 'Q62034653',
 'Q62034885',
 'Q62035236',
 'Q62035258',
 'Q62041507',
 'Q62042202',
 'Q62055113',
 'Q62111106',
 'Q62809344',
 'Q62809344',
 'Q62811814',
 'Q62812840',
 'Q62813345',
 'Q62813365',
 'Q62819272',
 'Q62821700',
 'Q62823054',
 'Q63010790',
 'Q630

In [254]:
# ------------------------------------------------------
# get data already in Wikidata about principal investigators
#prop = 'P8329' # principal investigator
#refProps = ['P854', 'P813'] # source URL, retrieved

field_name = 'employer'
discovery_allowed = False
statement_uuid_fieldname = 'employerStatementUuid'
reference_hash_fieldname = 'employerReferenceHash' # set to empty if references aren't tracked for this statement
ref_source_url_fieldname = 'employerReferenceSourceUrl' # set to empty if source URL isn't being tracked for this statement
ref_retrieved_fieldname = 'employerReferenceRetrieved'

wikidata_query_data = vbc.Query(pid='P8329', sleep=sparqlSleep).search_statement(qIds, ['P854', 'P813']) # source URL, retrieved

wikidata_query_data

[]

Decided not to worry about this last step (not edited from the hack yet) because there were no retrieved data.

In [None]:
for employee_index in range(0,len(employees)):
    # Everyone is assigned the employerQId as a value because either they showed up in the SPARQL search for employerQId
    # or we are making a statement that they work for employerQId.
    employees[employee_index][field_name] = employerQId
    # The source URL is the web page that was scraped to get their name
    ref_source_url = deptSettings[deptShortName]['baseUrl'] + employees[employee_index]['category']
    # The generic ref_retrieved date is used for all new references
    employees[employee_index] = generate_statement_data(employees[employee_index], wikidata_query_data, field_name, discovery_allowed, statement_uuid_fieldname, reference_hash_fieldname, ref_source_url_fieldname, ref_retrieved_fieldname, ref_source_url, ref_retrieved)