In [5]:
# Creates new systems into the DB without the need for an excel sheet
import sqlite3
import pandas as pd
import sys
sys.path.append('../')
from user_packages import hashing

conn = sqlite3.connect('../full_metadata.db')
cur = conn.cursor()

In [6]:
# Set variables
# These are the things you want to create into the systems hub

system_name = 'DIP: Data Intelligence Platform' # A name for the system. This isn't 'magic', so you can call it what you want. Suggested: something relevant like 'x system'.
system_key_phrase = 'DIP' # A uniquely identifyable phrase for the system. This will be used as the hash reference.
system_description = 'A datavault based platform for the storage and mastery of enterprise data' # A short description of the system

In [7]:
data = [
  {
    'SystemKeyPhrase': system_key_phrase
    , 'SystemName': system_name
    , 'Description': system_description
  }
];
df = pd.DataFrame(data)

#df

In [8]:
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'SystemHashKey'
  , columns = ['SystemKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'HashDiff'
  , columns = ['SystemName','Description']
)
df['RecordSource'] = df.apply(lambda row: 'Py.CreateSystem', axis=1)

#df

In [13]:
# Clear down and Write to staging
conn.execute("DELETE FROM stg_Py_CreateSystem")
conn.commit()
df.to_sql('stg_Py_CreateSystem', conn, if_exists='append', index=False)


1

In [14]:
# Write to the Hub
sql_query = """
INSERT INTO rv_h_System
(
    SystemHashKey
  , LoadDate
  , RecordSource
  , SystemKeyPhrase
)
SELECT DISTINCT
    SystemHashKey
  , LoadDate
  , RecordSource
  , SystemKeyPhrase
FROM
  stg_Py_CreateSystem
WHERE
  SystemHashKey NOT IN (SELECT SystemHashKey FROM rv_h_System)
""";
conn.execute(sql_query)
conn.commit()

In [15]:
# Write to the Satellite
sql_query = """
INSERT INTO rv_s_System
(
    SystemHashKey
  , LoadDate
  , RecordSource
  , HashDiff
  , SystemName
  , Description
)
SELECT DISTINCT
    stg.SystemHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.HashDiff
  , stg.SystemName
  , stg.Description
FROM
  stg_Py_CreateSystem AS stg
  LEFT OUTER JOIN rv_s_System AS sat ON (
    stg.SystemHashKey = sat.SystemHashKey
    AND sat.LoadDate = (
      SELECT MAX(z.LoadDate)
      FROM rv_s_System AS z
      WHERE z.SystemHashKey = sat.SystemHashKey
    )
  )
WHERE
  (
    sat.HashDiff != stg.HashDiff
    OR sat.HashDiff IS NULL
  )
""";
conn.execute(sql_query)
conn.commit()