This script will get the Q IDs for the gallery works whose P31 values are going to be changed after the reclassification/hierarchy project. Those Q IDs will then have their claims pulled from the CSV where they were saved after the original upload.

NOTE: The upload file doesn't capture any references for the new P31 values. They need to be added.

In [None]:
import pandas as pd
import csv

uuid_for_claim_to_delete_column_name = 'instance_of'

def write_dicts_to_csv(table, filename, fieldnames):
    with open(filename, 'w', newline='', encoding='utf-8') as csv_file_object:
        writer = csv.DictWriter(csv_file_object, fieldnames=fieldnames)
        writer.writeheader()
        for row in table:
            writer.writerow(row)

works_to_change_frame = pd.read_csv('3d_parts.csv', na_filter=False, dtype = str)
#works_to_change_frame = works_to_change_frame.head(3).copy() # uncomment for testing
thesaurus_ids_frame = pd.read_csv('thesauri_ids.csv', na_filter=False, dtype = str)
works_original_data_frame = pd.read_csv('../../gallery_works/works_multiprop.csv', na_filter=False, dtype = str)

noun_list = list(set(thesaurus_ids_frame['noun'])) # create a non-redundant list of the nouns used
works_qid_list = list(set(works_original_data_frame['qid']))

# Set up lists to hold the output data
claims_to_delete_list = []
claims_to_add_list = []

# Step through each item for which the new P31 claim needs to be made
for index, claim_row in works_to_change_frame.iterrows():
    qid = claim_row['qid']
    print(qid)
    # Look up the descriptive noun in the theaurus ID crosswalk and get the Wikidata Q ID for it
    if claim_row['noun-modified'] in noun_list:
        # Note, there should not be more than one matching result, hence .values[0]
        class_qid = thesaurus_ids_frame.loc[thesaurus_ids_frame.noun == claim_row['noun-modified'], 'wikidata'].values[0]
    else:
        print('Could not find a class Q ID for work', qid, 'with noun', claim_row['noun-modified'], ', index:', index)
        continue # Skip doing this row since the value couldn't be found
        
    # Look up the UUID for the current P31 claim
    if qid in works_qid_list:
        uuid = works_original_data_frame.loc[works_original_data_frame.qid == qid, uuid_for_claim_to_delete_column_name + '_uuid'].values[0]
        old_p31_qid = works_original_data_frame.loc[works_original_data_frame.qid == qid, uuid_for_claim_to_delete_column_name].values[0]
    else:
        print('Could not find a claim UUID for work', qid, ', index:', index)
        continue
    claims_to_delete_list.append({'qid': qid, uuid_for_claim_to_delete_column_name + '_uuid': uuid, uuid_for_claim_to_delete_column_name: old_p31_qid})
    claims_to_add_list.append({'qid': qid, uuid_for_claim_to_delete_column_name + '_uuid': '', uuid_for_claim_to_delete_column_name: class_qid})
    
fieldnames = ['qid', uuid_for_claim_to_delete_column_name + '_uuid', uuid_for_claim_to_delete_column_name]
write_dicts_to_csv(claims_to_delete_list, 'test_delete.csv', fieldnames)
write_dicts_to_csv(claims_to_add_list, 'test_upload.csv', fieldnames)

print('done')
