Looks at two different schema, and then compares the schemata. 

\- does it have the same tables  
\- does it have the same attributes on those tables  
\- do the attributes have the same definitions

NOT IMPLEMENTED YET  
\- are the FKs the same  
\- are the Indexes the same

In [6]:
import json
import os
import sqlite3
import csv

from pathlib import Path
from datetime import datetime

conn = sqlite3.connect('../full_metadata.db')
cur = conn.cursor()

time_string = datetime.now().strftime('%Y%m%d%H%M%S')
output_path = "../export_output_files/TXT/"


In [7]:
def fetch_data_as_dict(cursor, query, params):
  try:
    cursor.execute(query, params)
    columns = [desc[0] for desc in cursor.description]
    results = cursor.fetchall()
    data_dict_list = [dict(zip(columns, row)) for row in results]
    return data_dict_list
  except sqlite3.Error as e:
    print(f"Error: {e}")
    return []

In [8]:
# the first schema, identified by server/ db/ schema
apple_server_name = 'ipctrain.crm11.dynamics.com'
apple_database_name = 'ipctrain'
apple_schema_name = 'dbo'

# the second schema, identified by server/ db/ schema
orange_server_name = 'ipcdevprg.crm11.dynamics.com'
orange_database_name = 'ipcdevprg'
orange_schema_name = 'dbo'


In [9]:
# Get table diff
query = '''
-- Table names in a schema
WITH
APPLES AS (
  SELECT DISTINCT
    'APPLE' AS SOURCE
    , TABLE_NAME
    , COLUMN_COUNT
  FROM
    out_PhysicalStructureCurrentForSchemaCompare
  WHERE
    SERVER_NAME = ?
    AND DATABASE_NAME = ?
    AND SCHEMA_NAME = ?
)
, ORANGES AS (
  SELECT DISTINCT
    'ORANGE' AS SOURCE
    , TABLE_NAME
    , COLUMN_COUNT
  FROM
    out_PhysicalStructureCurrentForSchemaCompare
  WHERE
    SERVER_NAME = ?
    AND DATABASE_NAME = ?
    AND SCHEMA_NAME = ?
)
-- SQLite does not support full outer joins :(
-- have to do this in three stages.
, A_not_O AS (
  SELECT
    a.SOURCE AS In_Apples
    , o.SOURCE AS In_Oranges
    , a.TABLE_NAME
    , NULL AS IS_COLUMN_COUNT_DIFFERENT
  FROM
    APPLES a
    LEFT JOIN ORANGES o ON (a.TABLE_NAME = o.TABLE_NAME)
  WHERE
    o.SOURCE IS NULL -- in Apples, not in Oranges
)
, O_not_A AS (
  SELECT
    a.SOURCE AS In_Apples
    , o.SOURCE AS In_Oranges
    , o.TABLE_NAME
    , NULL AS IS_COLUMN_COUNT_DIFFERENT
  FROM
    ORANGES o
    LEFT JOIN APPLES a ON (a.TABLE_NAME = o.TABLE_NAME)
  WHERE
    a.SOURCE IS NULL -- in Oranges, not in Apples
)
, A_and_O AS (
  SELECT
    a.SOURCE AS In_Apples
    , o.SOURCE AS In_Oranges
    , a.TABLE_NAME
    , CASE
      WHEN a.COLUMN_COUNT = o.COLUMN_COUNT
      THEN 0
      ELSE 1
    END AS IS_COLUMN_COUNT_DIFFERENT
  FROM
    APPLES a
    INNER JOIN ORANGES o ON (a.TABLE_NAME = o.TABLE_NAME)
)
, ONION AS (
  SELECT
    In_Apples
    , In_Oranges
    , TABLE_NAME
    , IS_COLUMN_COUNT_DIFFERENT
  FROM
    A_and_O

  UNION

  SELECT
    In_Apples
    , In_Oranges
    , TABLE_NAME
    , IS_COLUMN_COUNT_DIFFERENT
  FROM
    A_not_O

  UNION

  SELECT
    In_Apples
    , In_Oranges
    , TABLE_NAME
    , IS_COLUMN_COUNT_DIFFERENT
  FROM
    O_not_A
)
SELECT
  In_Apples
  , In_Oranges
  , TABLE_NAME
  , IS_COLUMN_COUNT_DIFFERENT
FROM
  ONION
ORDER BY
  TABLE_NAME
;

'''
params = [
  apple_server_name
  , apple_database_name
  , apple_schema_name
  , orange_server_name
  , orange_database_name
  , orange_schema_name
]
table_dict_list = fetch_data_as_dict(cur, query, params)


#for row_dict in table_dict_list:
#  print(row_dict)


In [10]:
output_filename = "schema_compare_tables_" + time_string + ".csv"

# Get the keys from the first dictionary as the header
header = table_dict_list[0].keys()

# Write to CSV file
with open(os.path.join(output_path, output_filename), 'w', newline='') as f:
  writer = csv.DictWriter(f, fieldnames=header)
  writer.writeheader()
  writer.writerows(table_dict_list)


In [12]:
# Get column diff
query = '''
-- Column names and datatypes in a schema
WITH
APPLES AS (
  SELECT DISTINCT
    'APPLE' AS SOURCE
    , TABLE_NAME
    , COLUMN_NAME

    , IS_NULLABLE
    , DATA_TYPE
    , CHARACTER_MAXIMUM_LENGTH
    , NUMERIC_PRECISION
    , NUMERIC_SCALE
  
    , IFNULL(IS_NULLABLE, 'NULL')
      || "|" || IFNULL(DATA_TYPE, 'NULL')
      || "|" || IFNULL(CHARACTER_MAXIMUM_LENGTH, 'NULL')
      || "|" || IFNULL(NUMERIC_PRECISION, 'NULL')
      || "|" || IFNULL(NUMERIC_SCALE, 'NULL')
    AS DATATYPE_HASH
  FROM
    out_PhysicalAttributeCurrentForSchemaCompare
  WHERE
    SERVER_NAME = ?
    AND DATABASE_NAME = ?
    AND SCHEMA_NAME = ?
    --AND TABLE_NAME = ''
)
, ORANGES AS (
  SELECT DISTINCT
    'ORANGE' AS SOURCE
    , TABLE_NAME
    , COLUMN_NAME

    , IS_NULLABLE
    , DATA_TYPE
    , CHARACTER_MAXIMUM_LENGTH
    , NUMERIC_PRECISION
    , NUMERIC_SCALE
  
    , IFNULL(IS_NULLABLE, 'NULL')
      || "|" || IFNULL(DATA_TYPE, 'NULL')
      || "|" || IFNULL(CHARACTER_MAXIMUM_LENGTH, 'NULL')
      || "|" || IFNULL(NUMERIC_PRECISION, 'NULL')
      || "|" || IFNULL(NUMERIC_SCALE, 'NULL')
    AS DATATYPE_HASH
  FROM
    out_PhysicalAttributeCurrentForSchemaCompare
  WHERE
    SERVER_NAME = ?
    AND DATABASE_NAME = ?
    AND SCHEMA_NAME = ?
    --AND TABLE_NAME = ''
)
-- SQLite does not support full outer joins :(
-- have to do this in three stages.
, A_not_O AS (
  SELECT
    a.SOURCE AS In_Apples
    , o.SOURCE AS In_Oranges
    , a.TABLE_NAME
    , a.COLUMN_NAME
    , NULL AS IS_DATATYPE_DIFFERENT
  FROM
    APPLES a
    LEFT JOIN ORANGES o ON (
      a.TABLE_NAME = o.TABLE_NAME
      AND a.COLUMN_NAME = o.COLUMN_NAME
    )
  WHERE
    o.SOURCE IS NULL -- in Apples, not in Oranges
)
, O_not_A AS (
  SELECT
    a.SOURCE AS In_Apples
    , o.SOURCE AS In_Oranges
    , o.TABLE_NAME
    , o.COLUMN_NAME
    , NULL AS IS_DATATYPE_DIFFERENT
  FROM
    ORANGES o
    LEFT JOIN APPLES a ON (
      a.TABLE_NAME = o.TABLE_NAME
      AND a.COLUMN_NAME = o.COLUMN_NAME
    )
  WHERE
    a.SOURCE IS NULL -- in Oranges, not in Apples
)
, A_and_O AS (
  SELECT
    a.SOURCE AS In_Apples
    , o.SOURCE AS In_Oranges
    , a.TABLE_NAME
    , a.COLUMN_NAME
    , CASE
      WHEN a.DATATYPE_HASH = o.DATATYPE_HASH
      THEN 0
      ELSE 1
    END AS IS_DATATYPE_DIFFERENT
  FROM
    APPLES a
    INNER JOIN ORANGES o ON (
      a.TABLE_NAME = o.TABLE_NAME
      AND a.COLUMN_NAME = o.COLUMN_NAME
    )
)
, ONION AS (
  SELECT
    In_Apples
    , In_Oranges
    , TABLE_NAME
    , COLUMN_NAME
    , IS_DATATYPE_DIFFERENT
  FROM
    A_and_O

  UNION

  SELECT
    In_Apples
    , In_Oranges
    , TABLE_NAME
    , COLUMN_NAME
    , IS_DATATYPE_DIFFERENT
  FROM
    A_not_O

  UNION

  SELECT
    In_Apples
    , In_Oranges
    , TABLE_NAME
    , COLUMN_NAME
    , IS_DATATYPE_DIFFERENT
  FROM
    O_not_A
)
SELECT
  In_Apples
  , In_Oranges
  , TABLE_NAME
  , COLUMN_NAME
  , IS_DATATYPE_DIFFERENT
FROM
  ONION
ORDER BY
  TABLE_NAME
  , COLUMN_NAME
;
'''
params = [
  apple_server_name
  , apple_database_name
  , apple_schema_name
  , orange_server_name
  , orange_database_name
  , orange_schema_name
]
field_dict_list = fetch_data_as_dict(cur, query, params)


#for row_dict in field_dict_list:
#  print(row_dict)


In [13]:
output_filename = "schema_compare_fields_" + time_string + ".csv"

# Get the keys from the first dictionary as the header
header = field_dict_list[0].keys()

# Write to CSV file
with open(os.path.join(output_path, output_filename), 'w', newline='') as f:
  writer = csv.DictWriter(f, fieldnames=header)
  writer.writeheader()
  writer.writerows(field_dict_list)
