**Run Setup**

In [14]:
%run ./Setup.ipynb

**<mark>Provide the CSV file name that contains metadata to be updated</mark>**

In [15]:
#This file can be generated from Extract Purview Asset notebok and populated manually for description, BG terms, classification
#This file should be at the root of the working folder
#ExtractedAssetCSVFileName = 'yourfilename_details_purview_assets.csv' 
ExtractedAssetCSVFileName = 'yourfilename_details_purview_assets.csv'

**Read into a dataframe and start process**

In [16]:
ExtractedAssetCSVFullPath = os.path.join(root_working_folder, ExtractedAssetCSVFileName) 
dfUpdates = pd.read_csv(ExtractedAssetCSVFullPath, sep=',', delimiter=None, header='infer', index_col=0)

In [17]:
update_paylod_folder_name = update_paylod_folder_name_template.format(os.getenv('PURVIEW_NAME'))
update_paylod_folder_fullpath = os.path.join(root_working_folder, update_paylod_folder_name)

if os.path.exists(update_paylod_folder_fullpath):
    shutil.rmtree(update_paylod_folder_fullpath, ignore_errors=False)

os.mkdir(update_paylod_folder_fullpath)

In [18]:
import numpy as np

asset_dic = {}
bg_dic = {}
cls_dic = {}

for index, row in dfUpdates.iterrows(): 
    thisName = row['name']
    thisQualifiedName = row['qualifiedName']
    thisClassification = row['classification']
    thisDescription = row['description']
    thisEntityType = row['entityType']
    thisAssetType = row['assetType']
    thisTerm = row['term']
    
    if pd.isna(row['id']):
        assetcnt, dfID = searchPurviewAssets(keyword=thisQualifiedName, recursive_read=False)
        if assetcnt==1:
            thisID = dfID.loc[0]['id']
        else: 
            print('Asset with name {0} could not be found'.format(thisQualifiedName))
            thisID = -1
    else:
        thisID = row['id']   

    asset_dic[thisID] = thisDescription

    #search for BG terms and extract asset guid for them
    if not pd.isna(row['term']):
        termList = row['term'].strip().split(separator_char)
        for term in termList:
            if term not in bg_dic:
                bg_dic[term] = []
            bg_dic[term].extend([thisID])

    #search for Classifications and extract asset guid for them
    if not pd.isna(row['classification']):
        classificationList = row['classification'].strip().split(separator_char)
        for classification in classificationList:
            if classification not in cls_dic:
                cls_dic[classification] = []
            cls_dic[classification].extend([thisID])


**Generate the script and payload files for asset description update**

In [19]:
#asset_update_cli_template = "pv entity put --guid \"{0}\" --name \"description\" --payload-file \"{1}\""
asset_update_cli_template = "pv entity put --guid \"{0}\" --attrName=\"description\" --attrValue=\"{1}\""

In [20]:
try:
    desc_update_script_file = os.path.join(update_paylod_folder_fullpath, os.getenv('PURVIEW_NAME')+"_asset_desciption_update.ps1_" )
    scriptfile = open(desc_update_script_file, "w") 
    scriptfile.writelines("$env:PURVIEW_NAME='{0}'".format(os.getenv('PURVIEW_NAME')))
    scriptfile.write("\n")
    scriptfile.writelines("$env:AZURE_CLIENT_ID='{0}'".format(os.getenv('AZURE_CLIENT_ID')))
    scriptfile.write("\n")
    scriptfile.writelines("$env:AZURE_TENANT_ID='{0}'".format(os.getenv('AZURE_TENANT_ID')))
    scriptfile.write("\n")
    scriptfile.writelines("$env:AZURE_CLIENT_SECRET='{0}'".format("your client secret"))
    scriptfile.write("\n")
    scriptfile.write("\n")

    for assetId, desc in asset_dic.items():
        if pd.isna(desc):
            continue;

        if assetId == -1:
            scriptfile.write("\n")
            scriptfile.writelines("#asset id could not be found -- skipping updating '{0}' description".format(desc))
            scriptfile.write("\n")
        else:
            desc_update_file = os.path.join(update_paylod_folder_fullpath, assetId + "_desc.json")
            # try:
            #     file = open(desc_update_file, 'w',encoding='utf-8') 
            #     file.writelines("\"{0}\"".format(desc)) 
            # finally:
            #     file.close() 
            
            scriptfile.write("\n")
            #scriptfile.writelines(asset_update_cli_template.format(assetId, desc_update_file))
            scriptfile.writelines(asset_update_cli_template.format(assetId, desc))
            scriptfile.write("\n")
finally:
    scriptfile.close()

**Generate the script and payload files for asset term association**

In [21]:
term_assignment_cli_template = "pv glossary createTermsAssignedEntities --termGuid \"{0}\" --payloadFile \"{1}\""

In [22]:
try:
    bg_update_script_file = os.path.join(update_paylod_folder_fullpath, os.getenv('PURVIEW_NAME')+"_asset_term_association.ps1_" )
    scriptfile = open(bg_update_script_file, "w") 
    scriptfile.writelines("$env:PURVIEW_NAME='{0}'".format(os.getenv('PURVIEW_NAME')))
    scriptfile.write("\n")
    scriptfile.writelines("$env:AZURE_CLIENT_ID='{0}'".format(os.getenv('AZURE_CLIENT_ID')))
    scriptfile.write("\n")
    scriptfile.writelines("$env:AZURE_TENANT_ID='{0}'".format(os.getenv('AZURE_TENANT_ID')))
    scriptfile.write("\n")
    scriptfile.writelines("$env:AZURE_CLIENT_SECRET='{0}'".format("your client secret"))
    scriptfile.write("\n")
    scriptfile.write("\n")

    for term, assetsList in bg_dic.items():
            thisTermId = formalNameToGuid(term)
            if thisTermId=='':
                scriptfile.write("\n")
                scriptfile.writelines("#A unique term id could not be found -- skipping assignment for '{0}' term".format(term))
                scriptfile.write("\n")
                continue

            term_update_file = os.path.join(update_paylod_folder_fullpath, thisTermId + "_assignment.json")
            json_obj = []
            for assignmentAsset in assetsList:
                json_obj.append({'guid' : '{0}'.format(assignmentAsset)})
                with open(term_update_file,'w',encoding='utf-8') as jsonFile:
                    json.dump(json_obj, jsonFile)

            scriptfile.write("\n")
            scriptfile.writelines("# Updating term assignment: {0}".format(term))
            scriptfile.write("\n")
            scriptfile.writelines(term_assignment_cli_template.format(thisTermId, term_update_file))
            scriptfile.write("\n")
finally:
    scriptfile.close()

In [23]:
classification_assignment_cli_template = "pv entity createClassifications --guid=\"{0}\" --payloadFile=\"{1}\""

In [24]:
try:
    classification_update_script_file = os.path.join(update_paylod_folder_fullpath, os.getenv('PURVIEW_NAME')+"_asset_classification_association.ps1_" )
    scriptfile = open(classification_update_script_file, "w") 
    scriptfile.writelines("$env:PURVIEW_NAME='{0}'".format(os.getenv('PURVIEW_NAME')))
    scriptfile.write("\n")
    scriptfile.writelines("$env:AZURE_CLIENT_ID='{0}'".format(os.getenv('AZURE_CLIENT_ID')))
    scriptfile.write("\n")
    scriptfile.writelines("$env:AZURE_TENANT_ID='{0}'".format(os.getenv('AZURE_TENANT_ID')))
    scriptfile.write("\n")
    scriptfile.writelines("$env:AZURE_CLIENT_SECRET='{0}'".format("your client secret"))
    scriptfile.write("\n")
    scriptfile.write("\n")
    scriptfile.write("# Errors might be due to the fact that a classification already exists on the asset. You can safely ignore these type of errors.")
    scriptfile.write("\n")    
    scriptfile.write("\n")

    for classification, assetsList in cls_dic.items():

        classification_update_file = os.path.join(update_paylod_folder_fullpath, classification.replace(".","_") + "_assignment.json")
        
        json_obj = []
        json_obj.append(classification)
        with open(classification_update_file,'w',encoding='utf-8') as jsonFile:
            json.dump(json_obj, jsonFile)
        
        scriptfile.write("\n")
        scriptfile.writelines("# Updating classification assignment: {0}".format(classification))
        for assignmentAsset in assetsList:
            scriptfile.write("\n")
            scriptfile.writelines(classification_assignment_cli_template.format(assignmentAsset, classification_update_file))
            scriptfile.write("\n")
finally:
    scriptfile.close()

In [25]:
print("Scripts and payload files were generated")