In [1]:
# Creates relations of ConceptualEntity <> ConceptualEntity
# Unique of the relation includes the Forward phrase, as it's possible for entities to be related in multiple ways.
# eg a Communication can be sent to a Customer, and sent from a Customer
import sqlite3
import pandas as pd
import numpy as np
import os
from datetime import datetime
import sys
sys.path.append('../../')
from user_packages import hashing

In [23]:
# Set script variables
source_file_name = 'Org_Dictionary(EntityRelations).csv'
source_file_path = '../../imports/concept'

record_source = 'ConceptualEntityRelationsCSV'

In [31]:
conn = sqlite3.connect('../../full_metadata.db')
cur = conn.cursor()

In [35]:
# read table to df
df = pd.read_csv(os.path.join(source_file_path, source_file_name))
df = df.replace({np.nan: None})

#df

In [36]:
# Create additional fields
df['RecordSource'] = record_source
df['LoadDate'] = datetime.now()

df['ConceptualEntityKeyPhrase'] = df.apply(lambda row: row['ConceptualEntity'], axis=1)
df['ConceptualEntity_RelatedKeyPhrase'] = df.apply(lambda row: row['ConceptualEntity_Related'], axis=1)
df['ConceptualEntityConceptualEntity_RelatedKeyPhrase'] = df.apply(lambda row: row['ConceptualEntityKeyPhrase'] + ':' + row['ForwardRelationPhrase'] + ':' + row['ConceptualEntity_RelatedKeyPhrase'], axis=1)


In [37]:
# Hash fields

df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'ConceptualEntityHashKey'
  , columns = ['ConceptualEntityKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'ConceptualEntity_RelatedHashKey'
  , columns = ['ConceptualEntity_RelatedKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'ConceptualEntityConceptualEntity_RelatedHashKey'
  , columns = ['ConceptualEntityConceptualEntity_RelatedKeyPhrase']
)
df = hashing.add_md5_hash_column(
  df
  , md5_column_name = 'HashDiff'
  , columns = [
    'ForwardRelationPhrase'
    , 'ReverseRelationPhrase'
    , 'isDeleted'
    , 'Description'
  ]
)

#df

In [27]:
# Clear down and Write to staging
conn.execute("DELETE FROM stg_Py_CSV_CE_to_CERelated")
conn.commit()
df.to_sql('stg_Py_CSV_CE_to_CERelated', conn, if_exists='append', index=False)

78

In [33]:
# Write to the Link (ConceptualEntity - ConceptualEntity_Related)
sql_query = """
INSERT INTO rv_l_ConceptualEntityConceptualEntity_Related
(
  ConceptualEntityConceptualEntity_RelatedHashKey
  , LoadDate
  , RecordSource
  , ConceptualEntityHashKey
  , ConceptualEntity_RelatedHashKey
  , ForwardRelationPhrase
)
SELECT
  stg.ConceptualEntityConceptualEntity_RelatedHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.ConceptualEntityHashKey
  , stg.ConceptualEntity_RelatedHashKey
  , stg.ForwardRelationPhrase
FROM
  stg_Py_CSV_CE_to_CERelated stg
WHERE
  stg.ConceptualEntityConceptualEntity_RelatedHashKey IS NOT NULL
  AND ConceptualEntityConceptualEntity_RelatedHashKey NOT IN (
    SELECT ConceptualEntityConceptualEntity_RelatedHashKey
    FROM rv_l_ConceptualEntityConceptualEntity_Related
  )
""";
conn.execute(sql_query)
conn.commit()

In [34]:
# Write to the Satellite (rv_s_ConceptualEntityConceptualEntity_Related)
sql_query = """
INSERT INTO rv_s_ConceptualEntityConceptualEntity_Related
(
    ConceptualEntityConceptualEntity_RelatedHashKey
  , LoadDate
  , RecordSource
  , HashDiff
  , ReverseRelationPhrase
  , isDeleted
  , "Description"
)
SELECT DISTINCT
    stg.ConceptualEntityConceptualEntity_RelatedHashKey
  , stg.LoadDate
  , stg.RecordSource
  , stg.HashDiff
  , stg.ReverseRelationPhrase
  , stg.isDeleted
  , stg."Description"
FROM
  stg_Py_CSV_CE_to_CERelated AS stg
  LEFT OUTER JOIN rv_s_ConceptualEntityConceptualEntity_Related AS sat ON (
    stg.ConceptualEntityConceptualEntity_RelatedHashKey = sat.ConceptualEntityConceptualEntity_RelatedHashKey
    AND sat.LoadDate = (
      SELECT MAX(z.LoadDate)
      FROM rv_s_ConceptualEntityConceptualEntity_Related AS z
      WHERE z.ConceptualEntityConceptualEntity_RelatedHashKey = sat.ConceptualEntityConceptualEntity_RelatedHashKey
    )
  )
WHERE
  (
    sat.HashDiff != stg.HashDiff
    OR sat.HashDiff IS NULL
  )
""";
conn.execute(sql_query)
conn.commit()