In [9]:
# Creates new business-level conceptual entities into the DB from an excel sheet
# Yes that's overkill for single records
import sqlite3
import pandas as pd
import sys
sys.path.append('../')
from user_packages import hashing

conn = sqlite3.connect('app_db.sqlite')
cur = conn.cursor()

TargetExcelFilename = 'BusinessArea.xlsx'
TargetExcelFilepath = 'imports/business_area/'

In [10]:
df = pd.read_excel(TargetExcelFilepath + TargetExcelFilename)
df.fillna('', inplace=True)


df['BusinessAreaKeyPhrase'] = df.apply(lambda row: row['ModelKeyPhrase'] + '.' + row['BusinessAreaName'], axis=1)
df['ModelBusinessAreaKeyPhrase'] = df.apply(lambda row: row['ModelKeyPhrase'] + ':' + row['BusinessAreaKeyPhrase'], axis=1)
df['BusinessBusinessAreaKeyPhrase'] = df.apply(lambda row: row['BusinessName'] + ':' + row['BusinessAreaKeyPhrase'], axis=1)
df['RecordSource'] = df.apply(lambda row: 'Py.CreateBusinessArea', axis=1)

  
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'ModelHashKey'
  , columns = ['ModelKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'BusinessAreaHashKey'
  , columns = ['BusinessAreaKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'ModelBusinessAreaHashKey'
  , columns = ['ModelBusinessAreaKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'BusinessHashKey'
  , columns = ['BusinessName']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'HashDiff'
  , columns = ['BusinessAreaName', 'Description', 'isDeleted']
)

#df


In [11]:
# Clear down and Write to staging
conn.execute("DELETE FROM stg_Py_ExcelToBusinessArea")
conn.commit()
df.to_sql('stg_Py_ExcelToBusinessArea', conn, if_exists='append', index=False)

11

In [12]:
# Write to the Hub (BusinessArea)
sql_query = """
INSERT INTO rv_h_BusinessArea
(
    BusinessAreaHashKey
  , LoadDate
  , RecordSource
  , BusinessAreaKeyPhrase
)
SELECT DISTINCT
    BusinessAreaHashKey
  , LoadDate
  , RecordSource
  , BusinessAreaKeyPhrase
FROM
  stg_Py_ExcelToBusinessArea
WHERE
  BusinessAreaHashKey NOT IN (SELECT BusinessAreaHashKey FROM rv_h_BusinessArea)
""";
conn.execute(sql_query)
conn.commit()

In [13]:
# Write to the Satellite (BusinessArea)
sql_query = """
INSERT INTO rv_s_BusinessArea
(
    BusinessAreaHashKey
  , LoadDate
  , RecordSource
  , HashDiff
  , BusinessAreaName
  , Description
  , isDeleted
)
SELECT DISTINCT
    stg.BusinessAreaHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.HashDiff
  , stg.BusinessAreaName
  , stg.Description
  , stg.isDeleted
FROM
  stg_Py_ExcelToBusinessArea AS stg
  LEFT OUTER JOIN rv_s_BusinessArea AS sat ON (
    stg.BusinessAreaHashKey = sat.BusinessAreaHashKey
    AND sat.LoadDate = (
      SELECT MAX(z.LoadDate)
      FROM rv_s_BusinessArea AS z
      WHERE z.BusinessAreaHashKey = sat.BusinessAreaHashKey
    )
  )
WHERE
  (
    sat.HashDiff != stg.HashDiff
    OR sat.HashDiff IS NULL
  )
""";
conn.execute(sql_query)
conn.commit()

In [14]:
# Write to the Link (Model - BusinessArea)
sql_query = """
INSERT INTO rv_l_ModelBusinessArea
(
  ModelBusinessAreaHashKey
  , LoadDate
  , RecordSource
  , ModelHashKey
  , BusinessAreaHashKey
)
SELECT
  stg.ModelBusinessAreaHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.ModelHashKey
  , stg.BusinessAreaHashKey
FROM
  stg_Py_ExcelToBusinessArea stg
WHERE
  stg.ModelBusinessAreaHashKey IS NOT NULL
  AND ModelBusinessAreaHashKey NOT IN (
    SELECT ModelBusinessAreaHashKey
    FROM rv_l_ModelBusinessArea
  )
""";
conn.execute(sql_query)
conn.commit()

In [15]:
# Write to the Link (Business - BusinessArea)
sql_query = """
INSERT INTO rv_l_BusinessBusinessArea
(
  BusinessBusinessAreaHashKey
  , LoadDate
  , RecordSource
  , BusinessHashKey
  , BusinessAreaHashKey
)
SELECT
  stg.BusinessBusinessAreaHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.BusinessHashKey
  , stg.BusinessAreaHashKey
FROM
  stg_Py_ExcelToBusinessArea stg
WHERE
  stg.BusinessBusinessAreaHashKey IS NOT NULL
  AND BusinessBusinessAreaHashKey NOT IN (
    SELECT BusinessBusinessAreaHashKey
    FROM rv_l_BusinessBusinessArea
  )
""";
conn.execute(sql_query)
conn.commit()