Imports an Excel book from XRM toolbox<span style="color: var(--vscode-foreground);">, and a count of attributes per entity from the vault,</span> <span style="color: var(--vscode-foreground);">&nbsp;to the Phyiscal Structure Usage satellite</span>

XRM Toolbox

Tool: Entity Usage

  \> Inspect

  \> Count

  \> Export

In [26]:
import sqlite3
import pandas as pd
import numpy as np
from datetime import datetime
import sys
sys.path.append('../')
from user_packages import hashing

In [27]:
# Set variables
target_model_name = 'net.always.green'
source_file_name = 'ipc_train_entityusage.xlsx'
source_file_path = ''

record_source = 'XRMEntityUsage'
SystemKeyPhrase = 'iPropertyCloud' # this should come from the System Hub
SERVER_NAME = 'ipctrain.crm11.dynamics.com' # the server you harvested from

In [28]:
# Load spreadsheet
xl = pd.ExcelFile(source_file_name)
# load db
conn = sqlite3.connect('../full_metadata.db')

In [29]:
df = xl.parse(xl.sheet_names[0])
df = df.replace({np.nan: None})
df = df.drop('IsNoPrivilege', axis=1)

#df

In [30]:
# Get a frame with the attribute counts
sql_query = """
SELECT
  "entity_logical_name" AS EntitySchemaName -- yes, I know. The tool says schema name, but its the logical name
  , AttributeCount AS CountAttributes
FROM
  bv_DataversePhysicalStructureAttributeCount
WHERE
  SERVER_NAME = ?
""";
df_count = pd.read_sql_query(sql_query, conn, params = [SERVER_NAME])

#df_count

In [31]:
# Join the count to the main df
df = df.merge(df_count, on='EntitySchemaName', how='left').astype({'CountAttributes': 'Int64'})

#df

In [32]:
# add extra columns

# columns with variable data
df['RecordSource'] = record_source
df['LoadDate'] = datetime.now()
df['SystemKeyPhrase'] = SystemKeyPhrase
df['SERVER_NAME'] = SERVER_NAME

# derive keyphrase columns
df['PhysicalStructureKeyPhrase'] = df.apply(
  lambda row: 
    row['SERVER_NAME']
    # Dataverse doesn't have a DB or Schema name. There's only one DB (and you can't see it's name). There's only one Schema (and you can't see it's name).
    + '.' + row['EntityName'] # The Logical Name is the unique, not Schema Name
  , axis=1
)

# hash the keyphrases
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'PhysicalStructureHashKey'
  , columns = ['PhysicalStructureKeyPhrase']
)

# hash the payload
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'HashDiff'
  , columns = [
      'EntityName'
    , 'EntitySchemaName'
    , 'CountCustomAttributes'
    , 'CountRows'
    , 'ErrorMessage'
    , 'CountAttributes'
    , 'SERVER_NAME'
  ]
)

#df

In [33]:
# Clear down and Write to staging
conn.execute("DELETE FROM stg_Py_XRMExcelUsageToPhysicalStructure")
conn.commit()
df.to_sql('stg_Py_XRMExcelUsageToPhysicalStructure', conn, if_exists='append', index=False)

1411

In [34]:
# Write to the Satellite (PhysicalStructure_XRMEntityUsage)
sql_query = """
INSERT INTO rv_s_PhysicalStructure_XRMEntityUsage
(
    PhysicalStructureHashKey
  , LoadDate
  , RecordSource
  , HashDiff

  , "SERVER_NAME"
  , "EntityName"
  , "EntitySchemaName"
  , "CountCustomAttributes"
  , "CountRows"
  , "ErrorMessage"
  , "CountAttributes"

)
SELECT DISTINCT
    stg.PhysicalStructureHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.HashDiff
  
  , stg."SERVER_NAME"
  , stg."EntityName"
  , stg."EntitySchemaName"
  , stg."CountCustomAttributes"
  , stg."CountRows"
  , stg."ErrorMessage"
  , stg."CountAttributes"

FROM
  stg_Py_XRMExcelUsageToPhysicalStructure AS stg
  LEFT OUTER JOIN rv_s_PhysicalStructure_XRMEntityUsage AS sat ON (
    stg.PhysicalStructureHashKey = sat.PhysicalStructureHashKey
    AND sat.LoadDate = (
      SELECT MAX(z.LoadDate)
      FROM rv_s_PhysicalStructure_XRMEntityUsage AS z
      WHERE z.PhysicalStructureHashKey = sat.PhysicalStructureHashKey
    )
  )
WHERE
  (
    sat.HashDiff != stg.HashDiff
    OR sat.HashDiff IS NULL
  )
""";
conn.execute(sql_query)
conn.commit()