Imports a JSON file as found in the Synapse Link target datalake to the Phyiscal Structure and Physical Attribute hubs.  
Expected format:  
{

\<some other keys\>  
, "entities": \[  
  {  
    "name": "entityName"  
    , "attributes": \[ \<list of entity attribute dictionaries\> \]  
    , \<some other keys\>  
  }  
\]

<span style="color: var(--vscode-foreground);">}</span>

In [10]:
import sqlite3
import pandas as pd
import numpy as np
import os
import json
from datetime import datetime
import sys
sys.path.append('../')
from user_packages import hashing

In [5]:
# Set variables
target_model_name = 'contain.pig.tall' # from bv_Model
source_file_name = 'jsonformatter.txt'
source_file_path = ''

record_source = 'SynapseJSON'
SystemKeyPhrase = 'iPropertyCloud' # this should come from the System Hub, for linking structure to system
SERVER_NAME = 'ipctrain.crm11.dynamics.com' # the server you harvested from
DATABASE_NAME = 'ipctrain' # dynamics doesn't have a DB per se, so we're using the most specific part of the server URL, as that's what shows up in Synapse Link
SCHEMA_NAME = 'SynLink' # dynamics doesn't have a schema. What shows up in Synapse Link is a defined subset of dbo, so I'm calling it a different schema.

In [6]:
# Load JSON file
with open(os.path.join(source_file_path, source_file_name), 'r') as f:
  data = json.load(f)

# load db
conn = sqlite3.connect('../full_metadata.db')

In [7]:
# Extract entity information
entities = data['entities']
entity_info = [{'name': entity['name'], 'description': entity['description']} for entity in entities]

df_entities = pd.DataFrame(entity_info)

df_entities

Unnamed: 0,name,description
0,account,account
1,appointment,appointment
2,characteristic,characteristic
3,contact,contact
4,email,email
5,incident,incident
6,letter,letter
7,msdyn_agentstatushistory,msdyn_agentstatushistory
8,msdyn_channel,msdyn_channel
9,msdyn_ocliveworkitem,msdyn_ocliveworkitem


In [18]:
# Extract attribute information
attributes_info = []
for entity in entities:
  for attribute in entity['attributes']:
    attr_info = {
      'entity_name': entity['name']
      , 'name': attribute['name']
      , 'dataType': attribute['dataType']
      , 'maxLength': attribute['maxLength']
      , 'precision': None
      , 'scale': None
    }
    # Check for precision and scale in traits
    if 'cdm:traits' in attribute:
      for trait in attribute['cdm:traits']:
        if trait['traitReference'] == 'is.dataFormat.numeric.shaped':
          for arg in trait['arguments']:
            if arg['name'] == 'precision':
              attr_info['precision'] = arg['value']
            elif arg['name'] == 'scale':
              attr_info['scale'] = arg['value']
    attributes_info.append(attr_info)

df_attributes = pd.DataFrame(attributes_info)
df_attributes = df_attributes.replace({np.nan: None})

df_attributes

Unnamed: 0,entity_name,name,dataType,maxLength,precision,scale
0,account,Id,guid,-1,,
1,account,SinkCreatedOn,dateTime,-1,,
2,account,SinkModifiedOn,dateTime,-1,,
3,account,statecode,int64,-1,,
4,account,statuscode,int64,-1,,
...,...,...,...,...,...,...
6995,tllc_portalnotifications,transactioncurrencyidname,string,200,,
6996,tllc_portalnotifications,traversedpath,string,2500,,
6997,tllc_portalnotifications,utcconversiontimezonecode,int64,-1,,
6998,tllc_portalnotifications,versionnumber,int64,-1,,


In [12]:
# Physical Structure processing
# add extra columns
df = df_entities

# columns with variable data
df['RecordSource'] = record_source
df['LoadDate'] = datetime.now()
df['ModelKeyPhrase'] = target_model_name
df['SystemKeyPhrase'] = SystemKeyPhrase
df['SERVER_NAME'] = SERVER_NAME
df['DATABASE_NAME'] = DATABASE_NAME
df['SCHEMA_NAME'] = SCHEMA_NAME

# derive keyphrase columns
df['PhysicalStructureKeyPhrase'] = df.apply(
  lambda row: 
    row['SERVER_NAME']
    + '.' + row['DATABASE_NAME']
    + '.' + row['SCHEMA_NAME']
    + '.' + row['name'] # This corresponds to the Logical Name
  , axis=1
)
df['ModelPhysicalStructureKeyPhrase'] = df.apply(  lambda row: row['ModelKeyPhrase'] + ':' + row['PhysicalStructureKeyPhrase'], axis=1)
df['SystemPhysicalStructureKeyPhrase'] = df.apply(  lambda row: row['SystemKeyPhrase'] + ':' + row['PhysicalStructureKeyPhrase'], axis=1)

# hash the keyphrases
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'ModelHashKey'
  , columns = ['ModelKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'SystemHashKey'
  , columns = ['SystemKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'PhysicalStructureHashKey'
  , columns = ['PhysicalStructureKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'ModelPhysicalStructureHashKey'
  , columns = ['ModelPhysicalStructureKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'SystemPhysicalStructureHashKey'
  , columns = ['SystemPhysicalStructureKeyPhrase']
)


# hash the payload
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'HashDiff'
  , columns = [
      'name'
    , 'description'
    , 'SERVER_NAME'
    , 'DATABASE_NAME'
    , 'SCHEMA_NAME'
  ]
)

df


Unnamed: 0,name,description,RecordSource,LoadDate,ModelKeyPhrase,SystemKeyPhrase,SERVER_NAME,DATABASE_NAME,SCHEMA_NAME,PhysicalStructureKeyPhrase,ModelPhysicalStructureKeyPhrase,SystemPhysicalStructureKeyPhrase,ModelHashKey,SystemHashKey,PhysicalStructureHashKey,ModelPhysicalStructureHashKey,SystemPhysicalStructureHashKey,HashDiff
0,account,account,SynapseJSON,2024-08-01 16:30:22.189653,contain.pig.tall,iPropertyCloud,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.a...,contain.pig.tall:ipctrain.crm11.dynamics.com.i...,iPropertyCloud:ipctrain.crm11.dynamics.com.ipc...,6b32f893756c34985db247ea820d7799,54b366d66a6abb6abcdda98f7a7ecd16,808cb41c48b6f9e04e11e53019506d85,71005a328bffd5a7d884b01787c6da6f,c7772fde51679073bdd719049ae40930,72877cb9c2a64ffa94b9b494213c98f5
1,appointment,appointment,SynapseJSON,2024-08-01 16:30:22.189653,contain.pig.tall,iPropertyCloud,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.a...,contain.pig.tall:ipctrain.crm11.dynamics.com.i...,iPropertyCloud:ipctrain.crm11.dynamics.com.ipc...,6b32f893756c34985db247ea820d7799,54b366d66a6abb6abcdda98f7a7ecd16,06e997e22e90a840dd2cdc22674397f4,df3da405548f51cb5f100624874cb4c9,e1a2ca4005e6a54d1b9e43733375c92e,51db238aa59f5b5adf85ebdb1ba002b4
2,characteristic,characteristic,SynapseJSON,2024-08-01 16:30:22.189653,contain.pig.tall,iPropertyCloud,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.c...,contain.pig.tall:ipctrain.crm11.dynamics.com.i...,iPropertyCloud:ipctrain.crm11.dynamics.com.ipc...,6b32f893756c34985db247ea820d7799,54b366d66a6abb6abcdda98f7a7ecd16,db9162554da69f7e6ed7233b7f8bab6c,aadac5e03a45f57c46cf25f5f7e41593,10580a0f9695651e95aa767a2b38012d,c8b72170764e50f8c223487297a54ec5
3,contact,contact,SynapseJSON,2024-08-01 16:30:22.189653,contain.pig.tall,iPropertyCloud,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.c...,contain.pig.tall:ipctrain.crm11.dynamics.com.i...,iPropertyCloud:ipctrain.crm11.dynamics.com.ipc...,6b32f893756c34985db247ea820d7799,54b366d66a6abb6abcdda98f7a7ecd16,d692d3a78c5de1f001aa52b532afee4d,2659402dd67625a7fec5a8e520caa7be,65ff35351a87f0cf2f1b8cd870fa8a78,3689286ce376903764a703bb0873f08d
4,email,email,SynapseJSON,2024-08-01 16:30:22.189653,contain.pig.tall,iPropertyCloud,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.e...,contain.pig.tall:ipctrain.crm11.dynamics.com.i...,iPropertyCloud:ipctrain.crm11.dynamics.com.ipc...,6b32f893756c34985db247ea820d7799,54b366d66a6abb6abcdda98f7a7ecd16,fca871a9931c4452c1b3e1b74e2ec711,ec05706f3fa5cd75104dffff507f1e11,1076fd71faa274985f45ec8d2de5b897,00860679933eb1d9f7bb7fc3be880632
5,incident,incident,SynapseJSON,2024-08-01 16:30:22.189653,contain.pig.tall,iPropertyCloud,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.i...,contain.pig.tall:ipctrain.crm11.dynamics.com.i...,iPropertyCloud:ipctrain.crm11.dynamics.com.ipc...,6b32f893756c34985db247ea820d7799,54b366d66a6abb6abcdda98f7a7ecd16,fcb9c76b4880529da341747183ac74ef,8883f125c691398d1409ab3606a19fb7,48dbd0d25bd571d88741bb39e82a1e1e,2e5f7f516a57b4b14d664cbe6007b004
6,letter,letter,SynapseJSON,2024-08-01 16:30:22.189653,contain.pig.tall,iPropertyCloud,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.l...,contain.pig.tall:ipctrain.crm11.dynamics.com.i...,iPropertyCloud:ipctrain.crm11.dynamics.com.ipc...,6b32f893756c34985db247ea820d7799,54b366d66a6abb6abcdda98f7a7ecd16,3a7e8709f8d6161c4ccfd9ba9d35e5f3,8e294f32642dc17fffc23973f8c5f827,55ea9e0e2aa35af4a6e68f42dc960070,4a40987e396aee70c368fb41d7fa3fc2
7,msdyn_agentstatushistory,msdyn_agentstatushistory,SynapseJSON,2024-08-01 16:30:22.189653,contain.pig.tall,iPropertyCloud,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.m...,contain.pig.tall:ipctrain.crm11.dynamics.com.i...,iPropertyCloud:ipctrain.crm11.dynamics.com.ipc...,6b32f893756c34985db247ea820d7799,54b366d66a6abb6abcdda98f7a7ecd16,dc5d18dd2896bba3c0cc16138d089f35,dd0231797b7c4293075232b6655a78bf,9b7966711187e903ee94893b88434104,f94b5b16acdb83f409d3adb2f9f32246
8,msdyn_channel,msdyn_channel,SynapseJSON,2024-08-01 16:30:22.189653,contain.pig.tall,iPropertyCloud,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.m...,contain.pig.tall:ipctrain.crm11.dynamics.com.i...,iPropertyCloud:ipctrain.crm11.dynamics.com.ipc...,6b32f893756c34985db247ea820d7799,54b366d66a6abb6abcdda98f7a7ecd16,5d573c51f52883b19be335b684e3fbd3,b2a7349f8669cfeebeeec4e32c3f4b49,95aeb68a28a2760488385bfa3f46a16b,c5a0c2ae5d821aa0854b0de52b0acd2a
9,msdyn_ocliveworkitem,msdyn_ocliveworkitem,SynapseJSON,2024-08-01 16:30:22.189653,contain.pig.tall,iPropertyCloud,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.m...,contain.pig.tall:ipctrain.crm11.dynamics.com.i...,iPropertyCloud:ipctrain.crm11.dynamics.com.ipc...,6b32f893756c34985db247ea820d7799,54b366d66a6abb6abcdda98f7a7ecd16,afb7202416b1d48b5222f8d87cbb435e,605655e63bee37e4b2f54f36ecad4cc7,a72f2c976dd5b05e933604fbf09ea373,618cf6beec222de4f9eeb40281566238


In [13]:
# Clear down and Write to staging
conn.execute("DELETE FROM stg_Py_SynLinkJSONToPhysicalStructure")
conn.commit()
df.to_sql('stg_Py_SynLinkJSONToPhysicalStructure', conn, if_exists='append', index=False)

59

In [14]:
# Write to the Hub (PhysicalStructure)
sql_query = """
INSERT INTO rv_h_PhysicalStructure
(
    PhysicalStructureHashKey
  , LoadDate
  , RecordSource
  , PhysicalStructureKeyPhrase
)
SELECT DISTINCT
    PhysicalStructureHashKey
  , LoadDate
  , RecordSource
  , PhysicalStructureKeyPhrase
FROM
  stg_Py_SynLinkJSONToPhysicalStructure
WHERE
  PhysicalStructureHashKey NOT IN (SELECT PhysicalStructureHashKey FROM rv_h_PhysicalStructure)
""";
conn.execute(sql_query)
conn.commit()

In [15]:
# Write to the Satellite (PhysicalStructure_SynLinkJSON)
sql_query = """
INSERT INTO rv_s_PhysicalStructure_SynLinkJSON
(
    PhysicalStructureHashKey
  , LoadDate
  , RecordSource
  , HashDiff

  , "name"
  , "description"
  , "SERVER_NAME"
  , "DATABASE_NAME"
  , "SCHEMA_NAME"

)
SELECT DISTINCT
    stg.PhysicalStructureHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.HashDiff
  
  , stg."name"
  , stg."description"
  , stg."SERVER_NAME"
  , stg."DATABASE_NAME"
  , stg."SCHEMA_NAME"

FROM
  stg_Py_SynLinkJSONToPhysicalStructure AS stg
  LEFT OUTER JOIN rv_s_PhysicalStructure_SynLinkJSON AS sat ON (
    stg.PhysicalStructureHashKey = sat.PhysicalStructureHashKey
    AND sat.LoadDate = (
      SELECT MAX(z.LoadDate)
      FROM rv_s_PhysicalStructure_SynLinkJSON AS z
      WHERE z.PhysicalStructureHashKey = sat.PhysicalStructureHashKey
    )
  )
WHERE
  (
    sat.HashDiff != stg.HashDiff
    OR sat.HashDiff IS NULL
  )
""";
conn.execute(sql_query)
conn.commit()

In [16]:
# Write to the Link (Model - PhysicalStructure)
sql_query = """
INSERT INTO rv_l_ModelPhysicalStructure
(
  ModelPhysicalStructureHashKey
  , LoadDate
  , RecordSource
  , ModelHashKey
  , PhysicalStructureHashKey
)
SELECT
  stg.ModelPhysicalStructureHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.ModelHashKey
  , stg.PhysicalStructureHashKey
FROM
  stg_Py_SynLinkJSONToPhysicalStructure stg
WHERE
  stg.ModelPhysicalStructureHashKey IS NOT NULL
  AND ModelPhysicalStructureHashKey NOT IN (
    SELECT ModelPhysicalStructureHashKey
    FROM rv_l_ModelPhysicalStructure
  )
""";
conn.execute(sql_query)
conn.commit()

In [17]:
# Write to the Link (System - PhysicalStructure)
sql_query = """
INSERT INTO rv_l_SystemPhysicalStructure
(
  SystemPhysicalStructureHashKey
  , LoadDate
  , RecordSource
  , SystemHashKey
  , PhysicalStructureHashKey
)
SELECT
  stg.SystemPhysicalStructureHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.SystemHashKey
  , stg.PhysicalStructureHashKey
FROM
  stg_Py_SynLinkJSONToPhysicalStructure stg
WHERE
  stg.SystemPhysicalStructureHashKey IS NOT NULL
  AND SystemPhysicalStructureHashKey NOT IN (
    SELECT SystemPhysicalStructureHashKey
    FROM rv_l_SystemPhysicalStructure
  )
""";
conn.execute(sql_query)
conn.commit()

In [19]:
# Processing the attributes
df = df_attributes

# add extra columns

# columns with variable data
df['RecordSource'] = record_source
df['LoadDate'] = datetime.now()
df['ModelKeyPhrase'] = target_model_name
df['SERVER_NAME'] = SERVER_NAME
df['DATABASE_NAME'] = DATABASE_NAME
df['SCHEMA_NAME'] = SCHEMA_NAME

# derive keyphrase columns
df['PhysicalStructureKeyPhrase'] = df.apply(
  lambda row: 
    row['SERVER_NAME']
    + '.' + row['DATABASE_NAME']
    + '.' + row['SCHEMA_NAME']
    + '.' + row['entity_name']
  , axis=1
)
df['PhysicalAttributeKeyPhrase'] = df.apply(
  lambda row: 
    row['SERVER_NAME']
    + '.' + row['DATABASE_NAME']
    + '.' + row['SCHEMA_NAME']
    + '.' + row['entity_name']
    + '.' + row['name']
  , axis=1
)
df['PhysicalStructurePhysicalAttributeKeyPhrase'] = df.apply(  lambda row: row['PhysicalStructureKeyPhrase'] + ':' + row['PhysicalAttributeKeyPhrase'], axis=1)

# hash the keyphrases
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'PhysicalStructureHashKey'
  , columns = ['PhysicalStructureKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'PhysicalAttributeHashKey'
  , columns = ['PhysicalAttributeKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'PhysicalStructurePhysicalAttributeHashKey'
  , columns = ['PhysicalStructurePhysicalAttributeKeyPhrase']
)

# hash the payload
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'HashDiff'
  , columns = [
      'entity_name'
    , 'name'
    , 'dataType'
    , 'maxLength'
    , 'precision'
    , 'scale'
    , 'SERVER_NAME'
    , 'DATABASE_NAME'
    , 'SCHEMA_NAME'
  ]
)

df

Unnamed: 0,entity_name,name,dataType,maxLength,precision,scale,RecordSource,LoadDate,ModelKeyPhrase,SERVER_NAME,DATABASE_NAME,SCHEMA_NAME,PhysicalStructureKeyPhrase,PhysicalAttributeKeyPhrase,PhysicalStructurePhysicalAttributeKeyPhrase,PhysicalStructureHashKey,PhysicalAttributeHashKey,PhysicalStructurePhysicalAttributeHashKey,HashDiff
0,account,Id,guid,-1,,,SynapseJSON,2024-08-01 16:43:57.349110,contain.pig.tall,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.a...,ipctrain.crm11.dynamics.com.ipctrain.SynLink.a...,ipctrain.crm11.dynamics.com.ipctrain.SynLink.a...,808cb41c48b6f9e04e11e53019506d85,438db05224ffc0295922301d2417105a,33ed9a4d481dcfe785fa90cc07679797,14d0ec86c3165e839b857d0b13042524
1,account,SinkCreatedOn,dateTime,-1,,,SynapseJSON,2024-08-01 16:43:57.349110,contain.pig.tall,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.a...,ipctrain.crm11.dynamics.com.ipctrain.SynLink.a...,ipctrain.crm11.dynamics.com.ipctrain.SynLink.a...,808cb41c48b6f9e04e11e53019506d85,c6c29f0044fcc1a789ac351791930f77,bda0b8f7e99d141510f226dba0e8f765,0119a1a5e574da831c1dbf4cb9971f0c
2,account,SinkModifiedOn,dateTime,-1,,,SynapseJSON,2024-08-01 16:43:57.349110,contain.pig.tall,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.a...,ipctrain.crm11.dynamics.com.ipctrain.SynLink.a...,ipctrain.crm11.dynamics.com.ipctrain.SynLink.a...,808cb41c48b6f9e04e11e53019506d85,3a9c08b035950bcc4ecbbb8814c54302,49824c43aaacf615f96353a9c2a5dd50,f23c50601eb01d4a31af0367a3c5a1b8
3,account,statecode,int64,-1,,,SynapseJSON,2024-08-01 16:43:57.349110,contain.pig.tall,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.a...,ipctrain.crm11.dynamics.com.ipctrain.SynLink.a...,ipctrain.crm11.dynamics.com.ipctrain.SynLink.a...,808cb41c48b6f9e04e11e53019506d85,fb308298cc4be0ba6d7f83807f910dd3,eb19134ec95e94d1390bd23f1bc909fe,8f9099316c866893d2adafdc5e39bf4a
4,account,statuscode,int64,-1,,,SynapseJSON,2024-08-01 16:43:57.349110,contain.pig.tall,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.a...,ipctrain.crm11.dynamics.com.ipctrain.SynLink.a...,ipctrain.crm11.dynamics.com.ipctrain.SynLink.a...,808cb41c48b6f9e04e11e53019506d85,daed55010cbeefb055d7d02369331614,51b575272bc05007019882be01f50ddc,28c9033a45c2021f4ea14adfb12893c7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6995,tllc_portalnotifications,transactioncurrencyidname,string,200,,,SynapseJSON,2024-08-01 16:43:57.349110,contain.pig.tall,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.t...,ipctrain.crm11.dynamics.com.ipctrain.SynLink.t...,ipctrain.crm11.dynamics.com.ipctrain.SynLink.t...,85f5860b362a718b1c3bf6562eaabc8b,966e391a82882ed5d62bf38964209d34,d97171eed67714665cb782feec71a294,cd949356a582ba8efabf4c40d2312a61
6996,tllc_portalnotifications,traversedpath,string,2500,,,SynapseJSON,2024-08-01 16:43:57.349110,contain.pig.tall,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.t...,ipctrain.crm11.dynamics.com.ipctrain.SynLink.t...,ipctrain.crm11.dynamics.com.ipctrain.SynLink.t...,85f5860b362a718b1c3bf6562eaabc8b,f45df3e9fbb00f6525ccdb98d46fada1,4a9302ba35688563cd02e3fe1bee802e,d6a4b98caf9bbdef74b338574ac7fd15
6997,tllc_portalnotifications,utcconversiontimezonecode,int64,-1,,,SynapseJSON,2024-08-01 16:43:57.349110,contain.pig.tall,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.t...,ipctrain.crm11.dynamics.com.ipctrain.SynLink.t...,ipctrain.crm11.dynamics.com.ipctrain.SynLink.t...,85f5860b362a718b1c3bf6562eaabc8b,ffb7d96dd41c283d93c908285c76210e,73f8b9f1afe047beaec3960432d822a3,af8c919bba57e853fb45ebde17a5ba57
6998,tllc_portalnotifications,versionnumber,int64,-1,,,SynapseJSON,2024-08-01 16:43:57.349110,contain.pig.tall,ipctrain.crm11.dynamics.com,ipctrain,SynLink,ipctrain.crm11.dynamics.com.ipctrain.SynLink.t...,ipctrain.crm11.dynamics.com.ipctrain.SynLink.t...,ipctrain.crm11.dynamics.com.ipctrain.SynLink.t...,85f5860b362a718b1c3bf6562eaabc8b,54886b3eca3ca289e2ce70f0573fba11,672382ef193ddf9501cfe5090ba44598,bd5fd91cd33cb0e1eb72221e67625082


In [20]:
# Clear down and Write to staging
conn.execute("DELETE FROM stg_Py_SynLinkJSONToPhysicalAttribute")
conn.commit()
df.to_sql('stg_Py_SynLinkJSONToPhysicalAttribute', conn, if_exists='append', index=False)

7000

In [21]:
# Write to the Hub (PhysicalAttribute)
sql_query = """
INSERT INTO rv_h_PhysicalAttribute
(
    PhysicalAttributeHashKey
  , LoadDate
  , RecordSource
  , PhysicalAttributeKeyPhrase
)
SELECT DISTINCT
    PhysicalAttributeHashKey
  , LoadDate
  , RecordSource
  , PhysicalAttributeKeyPhrase
FROM
  stg_Py_SynLinkJSONToPhysicalAttribute
WHERE
  PhysicalAttributeHashKey NOT IN (SELECT PhysicalAttributeHashKey FROM rv_h_PhysicalAttribute)
""";
conn.execute(sql_query)
conn.commit()

In [22]:
# Write to the Satellite (PhysicalAttribute_SynLinkJSON)
sql_query = """
INSERT INTO rv_s_PhysicalAttribute_SynLinkJSON
(
    PhysicalAttributeHashKey
  , LoadDate
  , RecordSource
  , HashDiff

  , "entity_name"
  , "name"
  , "dataType"
  , "maxLength"
  , "precision"
  , "scale"
  , "SERVER_NAME"
  , "DATABASE_NAME"
  , "SCHEMA_NAME"

)
SELECT DISTINCT
    stg.PhysicalAttributeHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.HashDiff
  
  , stg."entity_name"
  , stg."name"
  , stg."dataType"
  , stg."maxLength"
  , stg."precision"
  , stg."scale"
  , stg."SERVER_NAME"
  , stg."DATABASE_NAME"
  , stg."SCHEMA_NAME"

FROM
  stg_Py_SynLinkJSONToPhysicalAttribute AS stg
  LEFT OUTER JOIN rv_s_PhysicalAttribute_SynLinkJSON AS sat ON (
    stg.PhysicalAttributeHashKey = sat.PhysicalAttributeHashKey
    AND sat.LoadDate = (
      SELECT MAX(z.LoadDate)
      FROM rv_s_PhysicalAttribute_SynLinkJSON AS z
      WHERE z.PhysicalAttributeHashKey = sat.PhysicalAttributeHashKey
    )
  )
WHERE
  (
    sat.HashDiff != stg.HashDiff
    OR sat.HashDiff IS NULL
  )
""";
conn.execute(sql_query)
conn.commit()

In [23]:
# Write to the Link (PhysicalStructure - PhysicalAttribute)
sql_query = """
INSERT INTO rv_l_PhysicalStructurePhysicalAttribute
(
  PhysicalStructurePhysicalAttributeHashKey
  , LoadDate
  , RecordSource
  , PhysicalStructureHashKey
  , PhysicalAttributeHashKey
)
SELECT
  stg.PhysicalStructurePhysicalAttributeHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.PhysicalStructureHashKey
  , stg.PhysicalAttributeHashKey
FROM
  stg_Py_SynLinkJSONToPhysicalAttribute stg
WHERE
  stg.PhysicalStructurePhysicalAttributeHashKey IS NOT NULL
  AND PhysicalStructurePhysicalAttributeHashKey NOT IN (
    SELECT PhysicalStructurePhysicalAttributeHashKey
    FROM rv_l_PhysicalStructurePhysicalAttribute
  )
""";
conn.execute(sql_query)
conn.commit()