In [1]:
script_depth = '../../'

import sqlite3
import pandas as pd
import numpy as np
from datetime import datetime
import os
import sys
sys.path.append(script_depth)
from user_packages import hashing


In [2]:
# set variables
target_model_name = 'excited.skirt.earth'
source_file_name = 'OH_compliance_tables_analysis(in).csv'
source_file_path = script_depth + 'imports/physical/sqlserver'

record_source = 'SQLServerAnalysis'


In [3]:
# db connect
conn = sqlite3.connect(script_depth + 'full_metadata.db')

In [4]:
# read table to df
df = pd.read_csv(os.path.join(source_file_path, source_file_name))
df = df.replace({np.nan: None})


#df

In [5]:
# add extra columns

# columns with variable data
df['RecordSource'] = record_source
df['LoadDate'] = datetime.now()

# hash the payload
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'HashDiff'
  , columns = [
      'SERVER_NAME'
    , 'DATABASE_NAME'
    , 'SCHEMA_NAME'
    , 'TABLE_NAME'
    , 'ROW_COUNT'
    , 'TABLE_TYPE'
    , 'TABLE_TYPE_DESCRIPTION'
    , 'COLUMN_COUNT'
    , 'TABLE_DESCRIPTION'
  ]
)

#df

In [6]:
# Clear down and Write to staging
conn.execute("DELETE FROM stg_Py_SqlServerAnalysisCsvToPhysicalStructure")
conn.commit()
df.to_sql('stg_Py_SqlServerAnalysisCsvToPhysicalStructure', conn, if_exists='append', index=False)

20

In [7]:
# Write to the Satellite (PhysicalStructure_SqlServerScrape)
sql_query = """
INSERT INTO rv_s_PhysicalStructure_SqlServerScrape
(
    PhysicalStructureHashKey
  , LoadDate
  , RecordSource
  , HashDiff

  , SERVER_NAME
  , "DATABASE_NAME"
  , SCHEMA_NAME
  , TABLE_NAME
  , ROW_COUNT
  , TABLE_TYPE
  , TABLE_TYPE_DESCRIPTION
  , COLUMN_COUNT
  , TABLE_DESCRIPTION

)
SELECT DISTINCT
    stg.PhysicalStructureHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.HashDiff
  
  , stg.SERVER_NAME
  , stg."DATABASE_NAME"
  , stg.SCHEMA_NAME
  , stg.TABLE_NAME
  , stg.ROW_COUNT
  , stg.TABLE_TYPE
  , stg.TABLE_TYPE_DESCRIPTION
  , stg.COLUMN_COUNT
  , stg.TABLE_DESCRIPTION

FROM
  stg_Py_SqlServerAnalysisCsvToPhysicalStructure AS stg
  LEFT OUTER JOIN rv_s_PhysicalStructure_SqlServerScrape AS sat ON (
    stg.PhysicalStructureHashKey = sat.PhysicalStructureHashKey
    AND sat.LoadDate = (
      SELECT MAX(z.LoadDate)
      FROM rv_s_PhysicalStructure_SqlServerScrape AS z
      WHERE z.PhysicalStructureHashKey = sat.PhysicalStructureHashKey
    )
  )
WHERE
  (
    sat.HashDiff != stg.HashDiff
    OR sat.HashDiff IS NULL
  )
""";
conn.execute(sql_query)
conn.commit()

In [8]:
# Get the incoming tags from the staging table,
# split them into a list of tags
# insert tags, taking out the ones that are currently active.
# NOTE: This does not END any tags.
sql_query = """
WITH RECURSIVE
source_table AS (
	SELECT 
	  PhysicalStructureHashKey AS id
	  , Tags as string
	FROM stg_Py_SqlServerAnalysisCsvToPhysicalStructure
)
, splitstring (id, string, remaining_string) AS (
  SELECT
		id
    , CASE
      WHEN a.string LIKE '%;%'
      THEN SUBSTRING(a.string, 1, INSTR(a.string, ';')-1)
      ELSE a.string
    END AS string
    , SUBSTRING(SUBSTRING(a.string, INSTR(a.string, ';') + 1), 1) AS remaining_string
  FROM source_table AS a
UNION ALL
  SELECT 
		id
    , CASE 
      WHEN c.remaining_string LIKE '%;%' 
      THEN SUBSTRING(c.remaining_string, 1, INSTR(c.remaining_string, ';')-1) 
      ELSE c.remaining_string
    END AS string
    , CASE
      WHEN c.remaining_string LIKE '%;%' 
      THEN SUBSTRING( SUBSTRING(c.remaining_string, INSTR(c.remaining_string, ';')+1), 1)
    END AS remaining_string
  FROM splitstring c
  WHERE
    c.string <> ''
    AND c.string IS NOT NULL
)
, incoming_tags AS (
  SELECT
    s.id AS PhysicalStructureHashKey
		, stg.LoadDate 
		, stg.RecordSource
		
    , s.string AS Tag
    , stg.LoadDate AS StartDate
  --   , ROW_NUMBER() OVER (
  -- 		PARTITION BY s.id
  -- 		ORDER BY s.string DESC
  -- 	) AS ROW_id
  FROM
    splitstring s
    LEFT JOIN stg_Py_SqlServerAnalysisCsvToPhysicalStructure stg ON (s.id = stg.PhysicalStructureHashKey)
  WHERE
    s.string <> ''
    AND s.string IS NOT NULL
)
, current_tags AS ( -- gets the active tags that are on the Attributes in the stg table
  SELECT
      sat.PhysicalStructureHashKey
    , sat.Tag
    --, sat.StartDate
    --, sat.EndDate
  FROM
    rv_s_PhysicalStructure_AnalysisTag sat
    INNER JOIN stg_Py_SqlServerAnalysisCsvToPhysicalStructure stg ON (
      sat.PhysicalStructureHashKey = stg.PhysicalStructureHashKey
      AND sat.LoadDate = (
        SELECT MAX(z.LoadDate)
        FROM rv_s_PhysicalStructure_AnalysisTag AS z
        WHERE z.PhysicalStructureHashKey = sat.PhysicalStructureHashKey
      )
    )
  WHERE
    sat.EndDate IS NULL
)
INSERT INTO rv_s_PhysicalStructure_AnalysisTag
(
    PhysicalStructureHashKey
  , LoadDate
  , RecordSource

  , Tag
  , StartDate
)
SELECT DISTINCT
    stg.PhysicalStructureHashKey
  , stg.LoadDate
  , stg.RecordSource
  
  , stg.Tag
  , stg.LoadDate AS StartDate
FROM
  incoming_tags AS stg
  LEFT OUTER JOIN current_tags AS sat ON (
    stg.PhysicalStructureHashKey = sat.PhysicalStructureHashKey
		AND stg.Tag = sat.Tag
  )
-- "Where in incoming_tags and not in current_tags"	
WHERE
	sat.PhysicalStructureHashKey IS NULL
	AND sat.Tag IS NULL
""";
conn.execute(sql_query)
conn.commit()
