Imports an Excel book from XRM toolbox to the Phyiscal Structure and Physical Attribute hubs.

  

XRM Toolbox

Tool: Metadata Document Generator

  \> Load Entities button

  \> Excel Workbook

  \> Generate All Attributes

  \> Select all entities

  \> Generate document

  \> (Wait)

In [None]:
import sqlite3
import pandas as pd
import numpy as np
from datetime import datetime
import sys
sys.path.append('../')
from user_packages import hashing

In [None]:
# Set variables
target_model_name = 'net.always.green'
source_file_name = 'dataverse_allentities_allattributes_20240531.xlsx'
source_file_path = ''

record_source = 'XRMMetadataGenerator'
SystemKeyPhrase = 'iPropertyCloud' # this should come from the System Hub
SERVER_NAME = 'ipctrain.crm11.dynamics.com' # the server you harvested from

In [3]:
# Load spreadsheet
xl = pd.ExcelFile(source_file_name)
# load db
conn = sqlite3.connect('../full_metadata.db')

In [13]:
entity_frames = []
attribute_frames = []

# Iterate over each sheet
for sheet_name in xl.sheet_names:
    # Load a sheet into a DataFrame
    df = xl.parse(sheet_name, header=None)
    #df = xl.parse(xl.sheet_names[0], header=None)

    # Get the first 8 rows into one dataframe
    df1 = df.iloc[:8]
    df1 = df1.drop(columns=[2,3,4,5,6,7])
    df1 = df1.set_index(0).T
    #print(df1)
    entity_frames.append(df1)

    # Get the rest of the rows into another dataframe
    df2 = xl.parse(sheet_name, header=9, converters={"Additional data": lambda x: x.replace("_x000D_\n", "\n")})
    df2['entity_logical_name'] = df1['Logical Name'].loc[df1.index[0]] # The Logical Name is the unique, not Schema Name
    
    #print(df2)
    attribute_frames.append(df2)



In [5]:
# combine parsed frames to one df
df = pd.concat(entity_frames)
#df

# add extra columns

# columns with variable data
df['RecordSource'] = record_source
df['LoadDate'] = datetime.now()
df['ModelKeyPhrase'] = target_model_name
df['SystemKeyPhrase'] = SystemKeyPhrase
df['SERVER_NAME'] = SERVER_NAME

# derive keyphrase columns
df['PhysicalStructureKeyPhrase'] = df.apply(
  lambda row: 
    row['SERVER_NAME']
    # Dataverse doesn't have a DB or Schema name. There's only one DB (and you can't see it's name). There's only one Schema (and you can't see it's name).
    + '.' + row['Logical Name'] # The Logical Name is the unique, not Schema Name
  , axis=1
)
df['ModelPhysicalStructureKeyPhrase'] = df.apply(  lambda row: row['ModelKeyPhrase'] + ':' + row['PhysicalStructureKeyPhrase'], axis=1)
df['SystemPhysicalStructureKeyPhrase'] = df.apply(  lambda row: row['SystemKeyPhrase'] + ':' + row['PhysicalStructureKeyPhrase'], axis=1)

# hash the keyphrases
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'ModelHashKey'
  , columns = ['ModelKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'SystemHashKey'
  , columns = ['SystemKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'PhysicalStructureHashKey'
  , columns = ['PhysicalStructureKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'ModelPhysicalStructureHashKey'
  , columns = ['ModelPhysicalStructureKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'SystemPhysicalStructureHashKey'
  , columns = ['SystemPhysicalStructureKeyPhrase']
)


# hash the payload
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'HashDiff'
  , columns = [
      'Entity'
    , 'Plural Display Name'
    , 'Description'
    , 'Schema Name'
    , 'Logical Name'
    , 'Object Type Code'
    , 'Is Custom Entity'
    , 'Ownership Type'
    , 'SERVER_NAME'
  ]
)

#df


In [6]:
# Clear down and Write to staging
conn.execute("DELETE FROM stg_Py_XRMExcelToPhysicalStructure")
conn.commit()
df.to_sql('stg_Py_XRMExcelToPhysicalStructure', conn, if_exists='append', index=False)

1950

In [7]:
# Write to the Hub (PhysicalStructure)
sql_query = """
INSERT INTO rv_h_PhysicalStructure
(
    PhysicalStructureHashKey
  , LoadDate
  , RecordSource
  , PhysicalStructureKeyPhrase
)
SELECT DISTINCT
    PhysicalStructureHashKey
  , LoadDate
  , RecordSource
  , PhysicalStructureKeyPhrase
FROM
  stg_Py_XRMExcelToPhysicalStructure
WHERE
  PhysicalStructureHashKey NOT IN (SELECT PhysicalStructureHashKey FROM rv_h_PhysicalStructure)
""";
conn.execute(sql_query)
conn.commit()

In [8]:
# Write to the Satellite (PhysicalStructure_XRMMetadata)
sql_query = """
INSERT INTO rv_s_PhysicalStructure_XRMMetadata
(
    PhysicalStructureHashKey
  , LoadDate
  , RecordSource
  , HashDiff

  , "Entity"
  , "Plural Display Name"
  , "Description"
  , "Schema Name"
  , "Logical Name"
  , "Object Type Code"
  , "Is Custom Entity"
  , "Ownership Type"
  , "SERVER_NAME"

)
SELECT DISTINCT
    stg.PhysicalStructureHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.HashDiff
  
  , stg."Entity"
  , stg."Plural Display Name"
  , stg."Description"
  , stg."Schema Name"
  , stg."Logical Name"
  , stg."Object Type Code"
  , stg."Is Custom Entity"
  , stg."Ownership Type"
  , stg."SERVER_NAME"

FROM
  stg_Py_XRMExcelToPhysicalStructure AS stg
  LEFT OUTER JOIN rv_s_PhysicalStructure_XRMMetadata AS sat ON (
    stg.PhysicalStructureHashKey = sat.PhysicalStructureHashKey
    AND sat.LoadDate = (
      SELECT MAX(z.LoadDate)
      FROM rv_s_PhysicalStructure_XRMMetadata AS z
      WHERE z.PhysicalStructureHashKey = sat.PhysicalStructureHashKey
    )
  )
WHERE
  (
    sat.HashDiff != stg.HashDiff
    OR sat.HashDiff IS NULL
  )
""";
conn.execute(sql_query)
conn.commit()

In [9]:
# Write to the Link (Model - PhysicalStructure)
sql_query = """
INSERT INTO rv_l_ModelPhysicalStructure
(
  ModelPhysicalStructureHashKey
  , LoadDate
  , RecordSource
  , ModelHashKey
  , PhysicalStructureHashKey
)
SELECT
  stg.ModelPhysicalStructureHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.ModelHashKey
  , stg.PhysicalStructureHashKey
FROM
  stg_Py_XRMExcelToPhysicalStructure stg
WHERE
  stg.ModelPhysicalStructureHashKey IS NOT NULL
  AND ModelPhysicalStructureHashKey NOT IN (
    SELECT ModelPhysicalStructureHashKey
    FROM rv_l_ModelPhysicalStructure
  )
""";
conn.execute(sql_query)
conn.commit()

In [10]:
# Write to the Link (System - PhysicalStructure)
sql_query = """
INSERT INTO rv_l_SystemPhysicalStructure
(
  SystemPhysicalStructureHashKey
  , LoadDate
  , RecordSource
  , SystemHashKey
  , PhysicalStructureHashKey
)
SELECT
  stg.SystemPhysicalStructureHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.SystemHashKey
  , stg.PhysicalStructureHashKey
FROM
  stg_Py_XRMExcelToPhysicalStructure stg
WHERE
  stg.SystemPhysicalStructureHashKey IS NOT NULL
  AND SystemPhysicalStructureHashKey NOT IN (
    SELECT SystemPhysicalStructureHashKey
    FROM rv_l_SystemPhysicalStructure
  )
""";
conn.execute(sql_query)
conn.commit()

In [16]:
# combine parsed frames to one df
df = pd.concat(attribute_frames)
#df

# add extra columns

# columns with variable data
df['RecordSource'] = record_source
df['LoadDate'] = datetime.now()
df['ModelKeyPhrase'] = target_model_name
df['SystemKeyPhrase'] = SystemKeyPhrase
df['SERVER_NAME'] = SERVER_NAME

# derive keyphrase columns
df['PhysicalStructureKeyPhrase'] = df.apply(
  lambda row: 
    row['SystemKeyPhrase']
    + '.' + row['SERVER_NAME']
    + '.' + row['Logical Name'] # The Logical Name is the unique, not Schema Name
  , axis=1
)
df['PhysicalAttributeKeyPhrase'] = df.apply(
  lambda row: 
    row['SystemKeyPhrase']
    + '.' + row['SERVER_NAME']
    + '.' + row['entity_logical_name']
    + '.' + row['Logical Name'] # The Logical Name is the unique, not Schema Name
  , axis=1
)
df['PhysicalStructurePhysicalAttributeKeyPhrase'] = df.apply(  lambda row: row['PhysicalStructureKeyPhrase'] + ':' + row['PhysicalAttributeKeyPhrase'], axis=1)

# hash the keyphrases
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'PhysicalStructureHashKey'
  , columns = ['PhysicalStructureKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'PhysicalAttributeHashKey'
  , columns = ['PhysicalAttributeKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'PhysicalStructurePhysicalAttributeHashKey'
  , columns = ['PhysicalStructurePhysicalAttributeKeyPhrase']
)


# hash the payload
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'HashDiff'
  , columns = [
      'Logical Name'
    , 'Schema Name'
    , 'Display Name'
    , 'Attribute Type'
    , 'Description'
    , 'Custom Attribute'
    , 'Type'
    , 'Additional data'
    , 'entity_logical_name'
    , 'SERVER_NAME'
  ]
)

#df

In [17]:
# Clear down and Write to staging
conn.execute("DELETE FROM stg_Py_XRMExcelToPhysicalAttribute")
conn.commit()
df.to_sql('stg_Py_XRMExcelToPhysicalAttribute', conn, if_exists='append', index=False)

89773

In [18]:
# Write to the Hub (PhysicalAttribute)
sql_query = """
INSERT INTO rv_h_PhysicalAttribute
(
    PhysicalAttributeHashKey
  , LoadDate
  , RecordSource
  , PhysicalAttributeKeyPhrase
)
SELECT DISTINCT
    PhysicalAttributeHashKey
  , LoadDate
  , RecordSource
  , PhysicalAttributeKeyPhrase
FROM
  stg_Py_XRMExcelToPhysicalAttribute
WHERE
  PhysicalAttributeHashKey NOT IN (SELECT PhysicalAttributeHashKey FROM rv_h_PhysicalAttribute)
""";
conn.execute(sql_query)
conn.commit()

In [20]:
# Write to the Satellite (PhysicalAttribute_XRMMetadata)
sql_query = """
INSERT INTO rv_s_PhysicalAttribute_XRMMetadata
(
    PhysicalAttributeHashKey
  , LoadDate
  , RecordSource
  , HashDiff

  , "SERVER_NAME"
  , "entity_logical_name"
  , "Logical Name"

  , "Schema Name"
  , "Display Name"
  , "Attribute Type"
  , "Description"
  , "Custom Attribute"
  , "Type"
  , "Additional data"

)
SELECT DISTINCT
    stg.PhysicalAttributeHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.HashDiff
  
  , stg."SERVER_NAME"
  , stg."entity_logical_name"
  , stg."Logical Name"

  , stg."Schema Name"
  , stg."Display Name"
  , stg."Attribute Type"
  , stg."Description"
  , stg."Custom Attribute"
  , stg."Type"
  , stg."Additional data"

FROM
  stg_Py_XRMExcelToPhysicalAttribute AS stg
  LEFT OUTER JOIN rv_s_PhysicalAttribute_XRMMetadata AS sat ON (
    stg.PhysicalAttributeHashKey = sat.PhysicalAttributeHashKey
    AND sat.LoadDate = (
      SELECT MAX(z.LoadDate)
      FROM rv_s_PhysicalAttribute_XRMMetadata AS z
      WHERE z.PhysicalAttributeHashKey = sat.PhysicalAttributeHashKey
    )
  )
WHERE
  (
    sat.HashDiff != stg.HashDiff
    OR sat.HashDiff IS NULL
  )
""";
conn.execute(sql_query)
conn.commit()

In [21]:
# Write to the Link (PhysicalStructure - PhysicalAttribute)
sql_query = """
INSERT INTO rv_l_PhysicalStructurePhysicalAttribute
(
  PhysicalStructurePhysicalAttributeHashKey
  , LoadDate
  , RecordSource
  , PhysicalStructureHashKey
  , PhysicalAttributeHashKey
)
SELECT
  stg.PhysicalStructurePhysicalAttributeHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.PhysicalStructureHashKey
  , stg.PhysicalAttributeHashKey
FROM
  stg_Py_XRMExcelToPhysicalAttribute stg
WHERE
  stg.PhysicalStructurePhysicalAttributeHashKey IS NOT NULL
  AND PhysicalStructurePhysicalAttributeHashKey NOT IN (
    SELECT PhysicalStructurePhysicalAttributeHashKey
    FROM rv_l_PhysicalStructurePhysicalAttribute
  )
""";
conn.execute(sql_query)
conn.commit()