In [13]:
# Imports a 'Data Entity' conceptual level export from iServer to the conceptual level
# There are a couple of custom columns that aren't in the out-of-the-box iServer
# isDeleted is one, but I can't remember the others.
# the iServer export sheet doesn't have the model ID for the Business Areas (Data Subject Area)
# so you need to add that as well
import sqlite3
import pandas as pd
import hashing

conn = sqlite3.connect('app_db.sqlite')
cur = conn.cursor()

TargetExcelFilename = 'Data Entity.xlsx'
TargetExcelFilepath = 'imports/concept/'

TargetModelKeyPhrase = 'wheel.count.wash'
TargetBusinessAreaModelKeyPhrase = 'structure.turn.train'


In [20]:
df = pd.read_excel(TargetExcelFilepath + TargetExcelFilename)
df.fillna('', inplace=True)
df.isDeleted = df.isDeleted.apply(lambda x: int(x) if x != '' else '')
df['Personally Identifiable Information'] = df['Personally Identifiable Information'].apply(lambda x: int(x) if x != '' else '')

df['RecordSource'] = df.apply(lambda row: 'Py.iserver_to_conceptual', axis=1)

df['ModelKeyPhrase'] = df.apply(lambda row: TargetModelKeyPhrase, axis=1)
df['ConceptualEntityKeyPhrase'] = df.apply(lambda row: row['ModelKeyPhrase'] + '.' + row['Name'], axis=1)
df['ModelConceptualEntityKeyPhrase'] = df.apply(lambda row: row['ModelKeyPhrase'] + ':' + row['ConceptualEntityKeyPhrase'], axis=1)
df['BusinessAreaKeyPhrase'] = df.apply(lambda row: TargetBusinessAreaModelKeyPhrase + '.' + row['Data Subject Area'], axis=1)
df['BusinessAreaConceptualEntityKeyPhrase'] = df.apply(lambda row: row['BusinessAreaKeyPhrase'] + ':' + row['ConceptualEntityKeyPhrase'], axis=1)


df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'ModelHashKey'
  , columns = ['ModelKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'ConceptualEntityHashKey'
  , columns = ['ConceptualEntityKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'BusinessAreaHashKey'
  , columns = ['BusinessAreaKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'HashDiff'
  , columns = [
      'Name'
    , 'isDeleted'
    , 'Description'
    , 'Definition'
    , 'SubType'
    , 'Data Subject Area'
    , 'Business Owner'
    , 'Data Steward'
    , 'Personally Identifiable Information'
    , 'Categories'
    , 'Model'
    , 'Date Last Modified'
    , 'Category (General)'
    , 'Last Modified By'
    , 'Data Criticality'
    , 'Information Security Classification'
    , 'Type'
    , 'Owner'
    , 'Privacy Classification'
    , 'Source'
    , 'Storage'
    , 'Filter Set'
  ]
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'ModelConceptualEntityHashKey'
  , columns = ['ModelConceptualEntityKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'BusinessAreaConceptualEntityHashKey'
  , columns = ['BusinessAreaConceptualEntityKeyPhrase']
)

# null out rows where there is no business area
df['BusinessAreaConceptualEntityHashKey'] = df.apply(lambda row: pd.NA if row['Data Subject Area'] == '' else row['BusinessAreaConceptualEntityHashKey'], axis=1)

#df

In [15]:
# Clear down and Write to staging
conn.execute("DELETE FROM stg_Py_iServerToConceptual")
conn.commit()
df.to_sql('stg_Py_iServerToConceptual', conn, if_exists='append', index=False)

117

In [16]:
# Write to the Hub (ConceptualEntity)
sql_query = """
INSERT INTO rv_h_ConceptualEntity
(
    ConceptualEntityHashKey
  , LoadDate
  , RecordSource
  , ConceptualEntityKeyPhrase
)
SELECT DISTINCT
    ConceptualEntityHashKey
  , LoadDate
  , RecordSource
  , ConceptualEntityKeyPhrase
FROM
  stg_Py_iServerToConceptual
WHERE
  ConceptualEntityHashKey NOT IN (SELECT ConceptualEntityHashKey FROM rv_h_ConceptualEntity)
""";
conn.execute(sql_query)
conn.commit()

In [17]:
# Write to the Satellite (ConceptualEntity_iServer)
sql_query = """
INSERT INTO rv_s_ConceptualEntity_iServer
(
    ConceptualEntityHashKey
  , LoadDate
  , RecordSource
  , HashDiff

  , "Name"
  , "isDeleted"
  , "Description"
  , "Definition"
  , "SubType"
  , "Data Subject Area"
  , "Business Owner"
  , "Data Steward"
  , "Personally Identifiable Information"
  , "Categories"
  , "Model"
  , "Date Last Modified"
  , "Category (General)"
  , "Last Modified By"
  , "Data Criticality"
  , "Information Security Classification"
  , "Type"
  , "Owner"
  , "Privacy Classification"
  , "Source"
  , "Storage"
  , "Filter Set"
)
SELECT DISTINCT
    stg.ConceptualEntityHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.HashDiff

  , stg."Name"
  , stg."isDeleted"
  , stg."Description"
  , stg."Definition"
  , stg."SubType"
  , stg."Data Subject Area"
  , stg."Business Owner"
  , stg."Data Steward"
  , stg."Personally Identifiable Information"
  , stg."Categories"
  , stg."Model"
  , stg."Date Last Modified"
  , stg."Category (General)"
  , stg."Last Modified By"
  , stg."Data Criticality"
  , stg."Information Security Classification"
  , stg."Type"
  , stg."Owner"
  , stg."Privacy Classification"
  , stg."Source"
  , stg."Storage"
  , stg."Filter Set"

FROM
  stg_Py_iServerToConceptual AS stg
  LEFT OUTER JOIN rv_s_ConceptualEntity_iServer AS sat ON (
    stg.ConceptualEntityHashKey = sat.ConceptualEntityHashKey
    AND sat.LoadDate = (
      SELECT MAX(z.LoadDate)
      FROM rv_s_ConceptualEntity_iServer AS z
      WHERE z.ConceptualEntityHashKey = sat.ConceptualEntityHashKey
    )
  )
WHERE
  (
    sat.HashDiff != stg.HashDiff
    OR sat.HashDiff IS NULL
  )
""";
conn.execute(sql_query)
conn.commit()

In [18]:
# Write to the Link (Model - ConceptualEntity)
sql_query = """
INSERT INTO rv_l_ModelConceptualEntity
(
  ModelConceptualEntityHashKey
  , LoadDate
  , RecordSource
  , ModelHashKey
  , ConceptualEntityHashKey
)
SELECT
  stg.ModelConceptualEntityHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.ModelHashKey
  , stg.ConceptualEntityHashKey
FROM
  stg_Py_iServerToConceptual stg
WHERE
  stg.ModelConceptualEntityHashKey IS NOT NULL
  AND ModelConceptualEntityHashKey NOT IN (
    SELECT ModelConceptualEntityHashKey
    FROM rv_l_ModelConceptualEntity
  )
""";
conn.execute(sql_query)
conn.commit()

In [19]:
# Write to the Link (BusinessArea - ConceptualEntity)
sql_query = """
INSERT INTO rv_l_BusinessAreaConceptualEntity
(
  BusinessAreaConceptualEntityHashKey
  , LoadDate
  , RecordSource
  , BusinessAreaHashKey
  , ConceptualEntityHashKey
)
SELECT
  stg.BusinessAreaConceptualEntityHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.BusinessAreaHashKey
  , stg.ConceptualEntityHashKey
FROM
  stg_Py_iServerToConceptual stg
WHERE
  stg.BusinessAreaConceptualEntityHashKey IS NOT NULL
  AND BusinessAreaConceptualEntityHashKey NOT IN (
    SELECT BusinessAreaConceptualEntityHashKey
    FROM rv_l_BusinessAreaConceptualEntity
  )
""";
conn.execute(sql_query)
conn.commit()