# Imports & Connections

In [None]:
import pandas as pd
import pyodbc
import sqlite3
import numpy as np

In [None]:
DB = {
    'servername': 'DESKTOP-IT4OHPV\SQLEXPRESS',
    'database': 'johari_Herkansing_NEW'
}

ssms_conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + DB['servername'] + 
                     ';DATABASE=' + DB['database'] + ';Trusted_Connection=yes')

In [None]:
# Local connections
sqlite_conn = sqlite3.connect('../data/processed/dwh.sqlite')
go_sales_conn = sqlite3.connect('../data/raw/go_sales.sqlite')
go_staff_conn = sqlite3.connect('../data/raw/go_staff.sqlite')
go_crm_conn = sqlite3.connect('../data/raw/go_crm.sqlite')

# Fact tables
fact_tables = ['training', 'satisfaction', 'returned_item', 'orders', 'sales_targetData', 'sales_product_forecast', 'sales_inventory_levels']
fact_dfs = {table: pd.read_sql(f'SELECT * FROM {table}', ssms_conn) for table in fact_tables}

# Dimension tables
dim_tables = ['product', 'sales_staff', 'course', 'satisfaction_type', 'return_reason', 'order_method', 'retailer_contact']
dim_dfs = {table: pd.read_sql(f'SELECT * FROM {table}', ssms_conn) for table in dim_tables}

# Test Tables
orders_df = fact_dfs['orders']
product_df = dim_dfs['product']
#product_df.head()

# Surrogate Key Creation

In [None]:
# Function to add surrogate key to dimension tables
def add_surrogate_key(conn, table_name):
    cursor = conn.cursor()
    # Add SK column
    cursor.execute(f"ALTER TABLE {table_name} ADD SK INT IDENTITY(1,1);")
    # Add EffectiveDate column
    cursor.execute(f"ALTER TABLE {table_name} ADD EffectiveDate DATETIME NOT NULL DEFAULT GETDATE();")
    # Add EndDate column
    cursor.execute(f"ALTER TABLE {table_name} ADD EndDate DATETIME NULL;")
    # Add IsActive column
    cursor.execute(f"ALTER TABLE {table_name} ADD IsActive BIT NOT NULL DEFAULT 1;")
    conn.commit()

# Add surrogate keys to dimension tables
for table in dim_tables:
    add_surrogate_key(ssms_conn, table)

# Foreign Surrogate Key Creation

In [None]:
# Function to add surrogate foreign key columns to fact tables
def add_surrogate_foreign_key_columns(conn, fact_table, key_mappings):
    cursor = conn.cursor()
    for _, (fact_table, foreign_key_column) in key_mappings.items():
        cursor.execute(f"ALTER TABLE {fact_table} ADD {foreign_key_column} INT;")
    conn.commit()

# Add surrogate foreign key columns to fact tables
for fact_table in fact_tables:
    if fact_table == 'orders':
        key_mappings = {
            'PRODUCT_id': ('orders', 'PRODUCT_SFK'),
            'ORDER_METHOD_id': ('orders', 'ORDER_METHOD_SFK'),
            'RETAILER_CONTACT_id': ('orders', 'RETAILER_CONTACT_SFK'),
            'RETURNED_REASON': ('orders', 'RETURNED_REASON_SFK'),
        }
        #add_surrogate_foreign_key_columns(ssms_conn, fact_table, key_mappings)

# Function to populate foreign keys in fact tables
def populate_foreign_keys(conn, fact_table, dim_table, natural_key, foreign_key_column, sfk_column):
    cursor = conn.cursor()
    # Adjusted SQL statement to correctly match and update SFKs
    sql = f"""
    UPDATE {fact_table}
    SET {sfk_column} = (
        SELECT {dim_table}.SK
        FROM {dim_table}
        WHERE {dim_table}.{natural_key} = {fact_table}.{foreign_key_column}
    )
    """
    cursor.execute(sql)
    conn.commit()

# Populate Foreign Surrogate Keys

In [None]:
# Populate SFKs for the 'orders' fact table
populate_foreign_keys(ssms_conn, 'orders', 'product', 'id', 'product_id', 'PRODUCT_SFK')
populate_foreign_keys(ssms_conn, 'orders', 'order_method', 'order_method_id', 'order_method_code', 'ORDER_METHOD_SFK')
#populate_foreign_keys(ssms_conn, 'orders', 'retailer_contact', 'id', 'retailer_contact_code', 'RETAILER_CONTACT_SFK')
#populate_foreign_keys(ssms_conn, 'orders', 'RETURN_REASON', 'return_reason_id', '', 'RETURNED_REASON_SFK')

# Simulate Slowly Changing Dimensions Type 2

In [None]:
# Step 1: Merge the Orders and Product DataFrames on the business key (ProductID)
# This operation is similar to the INNER JOIN in the SQL procedure
merged_df = pd.merge(orders_df, product_df[['ProductID', 'SK']], on='ProductID', how='inner')

# Step 2: Filter for active products
# Assuming the 'IsActive' column exists in your df_product DataFrame to indicate active products
active_products = product_df[product_df['IsActive'] == 1]

# Perform the merge again, this time ensuring we only include active products
merged_df_active = pd.merge(orders_df, active_products[['ProductID', 'SK']], on='ProductID', how='inner')

# Step 3: Update the PRODUCT_SFK in df_orders based on the SK from the active_products
# This assumes that the 'PRODUCT_SFK' column exists in your df_orders DataFrame
orders_df.update(merged_df_active[['SK']])

In [None]:
# Close connections
ssms_conn.close()
sqlite_conn.close()
go_sales_conn.close()
go_staff_conn.close()
go_crm_conn.close()