# Imports & Connections

In [1]:
import pandas as pd
import pyodbc
import sqlite3
import numpy as np
from datetime import datetime
from sqlalchemy import create_engine

In [2]:
DB = {
    'servername': 'DESKTOP-IT4OHPV\SQLEXPRESS',
    'database': 'johari_Herkansing_NEW1'
}

ssms_conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + DB['servername'] + 
                     ';DATABASE=' + DB['database'] + ';Trusted_Connection=yes')

# Connection String (update with your details)
connection_string = "mssql+pyodbc://@DESKTOP-IT4OHPV\SQLEXPRESS/johari_Herkansing_NEW1?driver=SQL+Server+Native+Client+11.0?trusted_connection=yes"
ssms_cursor = ssms_conn.cursor()   

# Create Engine
engine = create_engine(connection_string)

  'servername': 'DESKTOP-IT4OHPV\SQLEXPRESS',
  connection_string = "mssql+pyodbc://@DESKTOP-IT4OHPV\SQLEXPRESS/johari_Herkansing_NEW1?driver=SQL+Server+Native+Client+11.0?trusted_connection=yes"


In [3]:
# Local Connections
sqlite_conn = sqlite3.connect('../data/processed/dwh.sqlite')
go_sales_conn = sqlite3.connect('../data/raw/go_sales.sqlite')
go_staff_conn = sqlite3.connect('../data/raw/go_staff.sqlite')
go_crm_conn = sqlite3.connect('../data/raw/go_crm.sqlite')

# Fact tables
fact_tables = ['training', 'satisfaction', 'returned_item', 'orders', 'sales_targetData', 'sales_product_forecast', 'sales_inventory_levels']
fact_dfs = {table: pd.read_sql(f'SELECT * FROM {table}', ssms_conn) for table in fact_tables}

# Dimension tables
dim_tables = ['product', 'sales_staff', 'course', 'satisfaction_type', 'return_reason', 'order_method', 'retailer_contact']
dim_dfs = {table: pd.read_sql(f'SELECT * FROM {table}', ssms_conn) for table in dim_tables}

# Test Output
orders_df = fact_dfs['orders']
product_df = dim_dfs['product']
#product_df.head()

  fact_dfs = {table: pd.read_sql(f'SELECT * FROM {table}', ssms_conn) for table in fact_tables}
  dim_dfs = {table: pd.read_sql(f'SELECT * FROM {table}', ssms_conn) for table in dim_tables}


# Surrogate Key Creation

In [None]:
# Function to add surrogate key to dimension tables
def add_surrogate_key(conn, table_name):
    cursor = conn.cursor()
    # Add SK column
    cursor.execute(f"ALTER TABLE {table_name} ADD SK INT IDENTITY(1,1);")
    # Add EffectiveDate column
    cursor.execute(f"ALTER TABLE {table_name} ADD EffectiveDate DATETIME NOT NULL DEFAULT '1900-01-01';")
    # Add EndDate column
    cursor.execute(f"ALTER TABLE {table_name} ADD EndDate DATETIME NULL;")
    # Add IsActive column
    cursor.execute(f"ALTER TABLE {table_name} ADD IsActive BIT NOT NULL DEFAULT 1;")
    conn.commit()

# Add surrogate keys to dimension tables
for table in dim_tables:
    add_surrogate_key(ssms_conn, table)

# Foreign Surrogate Key Creation

In [None]:
# Function to add surrogate foreign key columns to fact tables
def add_surrogate_foreign_key_columns(conn, fact_table, key_mappings):
    cursor = conn.cursor()
    for _, (fact_table, foreign_key_column) in key_mappings.items():
        cursor.execute(f"ALTER TABLE {fact_table} ADD {foreign_key_column} INT;")
    conn.commit()

# Add surrogate foreign key columns to fact tables
for fact_table in fact_tables:
    if fact_table == 'orders':
        key_mappings = {
            #Orders
            'PRODUCT_id': ('orders', 'PRODUCT_SFK'),
            'ORDER_METHOD_id': ('orders', 'ORDER_METHOD_SFK'),
            'RETAILER_CONTACT_id': ('orders', 'RETAILER_CONTACT_SFK'),
            'RETURNED_REASON': ('orders', 'RETURNED_REASON_SFK'),

            #Sales_targetData
            'PRODUCT_id': ('sales_targetData', 'PRODUCT_SFK'),
            'SALES_STAFF_id': ('sales_targetData', 'SALES_STAFF_SFK'),

            #Sales_product_forecast
            'PRODUCT_id': ('sales_product_forecast', 'PRODUCT_SFK'),

            #Sales_inventory_levels
            'PRODUCT_id': ('sales_inventory_levels', 'PRODUCT_SFK'),

            #Satisfaction
            'SATISFACTION_TYPE_id': ('satisfaction', 'SATISFACTION_TYPE_SFK'),
            'SALES_STAFF_id': ('satisfaction', 'SALES_STAFF_SFK'),

            #Training
            'COURSE_id': ('training', 'COURSE_SFK'),
            'SALES_STAFF_id': ('training', 'SALES_STAFF_SFK'),

            #Returned_item
            'RETURN_REASON_id': ('returned_item', 'RETURN_REASON_SFK'),
        }
        #add_surrogate_foreign_key_columns(ssms_conn, fact_table, key_mappings)

#Product
def insert_sfk_targetData_product(conn):
    cursor = conn.cursor()
    sql = """
    UPDATE sales_targetData
    SET product_SFK = b.SK
    FROM sales_targetData a
    LEFT JOIN PRODUCT b
        ON b.id = a.product_id
        AND datefromparts(a.year, a.period, 1) BETWEEN b.EffectiveDate AND ISNULL(b.EndDate, '20991231')
    """
    try:
        cursor.execute(sql)
        conn.commit()
        print("Successfully inserted/updated SFKs in sales_targetData.")
    except Exception as e:
        conn.rollback()
        print(f"Error inserting/updating SFKs in sales_targetData: {e}")
    #finally:
        #cursor.close()

#Sales_staff
def insert_sfk_targetData_sales_staff(conn):
    cursor = conn.cursor()
    sql = """
    UPDATE sales_targetData
    SET sales_staff_sfk = b.SK
    FROM sales_targetData a
    LEFT JOIN sales_staff b
        ON b.sales_staff_id = a.staff_id
        AND datefromparts(a.year, a.period, 1) BETWEEN b.EffectiveDate AND ISNULL(b.EndDate, '20991231')
    """
    try:
        cursor.execute(sql)
        conn.commit()
        print("Successfully inserted/updated SFKs in Sales_staff.")
    except Exception as e:
        conn.rollback()
        print(f"Error inserting/updating SFKs in Sales_staff: {e}")
    #finally:
        #cursor.close()

def insert_sfk_orders_product(conn):
    cursor = conn.cursor()
    sql = """
    UPDATE orders
    SET product_sfk = b.SK
    FROM orders a
    LEFT JOIN product b
        ON b.id = a.product_id
        AND a.order_date BETWEEN b.EffectiveDate AND ISNULL(b.EndDate, '20991231')
    """
    try:
        cursor.execute(sql)
        conn.commit()
        print("Successfully inserted/updated SFKs in orders.")
    except Exception as e:
        conn.rollback()
        print(f"Error inserting/updating SFKs in orders: {e}")
    #finally:
        #cursor.close()

# Populate Foreign Surrogate Keys

In [None]:
# Populate SFKs for the 'sales_targetData' fact table
#insert_sfk_targetData_product(ssms_conn)
#insert_sfk_targetData_sales_staff(ssms_conn)

# Populate SFKs for the 'orders' fact table
#insert_sfk_orders_product(ssms_conn)

In [None]:
# Test Output
orders_df = fact_dfs['orders']
product_df = dim_dfs['product']
sales_targetData_df = fact_dfs['sales_targetData']
sales_targetData_df.head()

In [None]:
product_df

# Simulate Slowly Changing Dimensions Type 2

In [6]:
# Assuming ssms_cursor and ssms_conn are already defined and connected to your database

def simulate_scd_type_2_with_fact_update():
    # Step 1: Select a random active row from the PRODUCT table
    select_query = "SELECT TOP 1 * FROM dbo.PRODUCT WHERE IsActive = 1 ORDER BY NEWID()"
    ssms_cursor.execute(select_query)
    row = ssms_cursor.fetchone()
    if row is None:
        print("No active rows found.")
        return

    # Define old_id here
    old_id = row[0]  # Assuming the first column is 'id'

    # Retrieve the maximum id value from the PRODUCT table
    id_query = "SELECT MAX(id) FROM dbo.PRODUCT"
    ssms_cursor.execute(id_query)
    max_id = ssms_cursor.fetchone()[0]
    new_id = max_id + 1 if max_id is not None else 1
    
    # Step 2: Update the previously active row to set IsActive to 0 and update EndDate
    effective_date = datetime.now()
    update_query = "UPDATE dbo.PRODUCT SET IsActive = 0, EndDate = ? WHERE SK = ?"
    ssms_cursor.execute(update_query, (effective_date, row[-4]))
    ssms_conn.commit()

    # Step 3: Insert a new row with the same id (unusual for SCD Type 2)
    params = (old_id,) + row[1:-4] + (effective_date, None, 1)
    insert_query = """
    INSERT INTO dbo.PRODUCT 
    (id, name, image, description, type_id, type_name, line_id, line_name, introduction_date, production_cost, margin, language, EffectiveDate, EndDate, IsActive) 
    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    """
    ssms_cursor.execute(insert_query, params)
    ssms_conn.commit()

    # Fetch the last inserted SK
    ssms_cursor.execute("SELECT SCOPE_IDENTITY() AS NewSK")
    new_sk = ssms_cursor.fetchone()[0]

    # Optional Step 4: Update the sales_targetData table to reflect the new SK for the product
    update_sales_query = "UPDATE dbo.sales_targetData SET product_SFK = ? WHERE product_SFK = ?"
    ssms_cursor.execute(update_sales_query, (new_sk, row[-4]))  # Ensure correct indices and data types
    ssms_conn.commit()

simulate_scd_type_2_with_fact_update()

DataError: ('22018', "[22018] [Microsoft][ODBC SQL Server Driver][SQL Server]Conversion failed when converting the nvarchar value 'Granite Belay' to data type int. (245) (SQLExecDirectW)")

In [None]:
def simulate_scd_type_2():
    # Step 1: Select a random active row from the PRODUCT table
    select_query = "SELECT TOP 1 * FROM dbo.PRODUCT WHERE IsActive = 1 ORDER BY NEWID()"
    ssms_cursor.execute(select_query)
    row = ssms_cursor.fetchone()
    if row is None:
        print("No active rows found.")
        return

    # Retrieve the maximum id value from the PRODUCT table
    id_query = "SELECT MAX(id) FROM dbo.PRODUCT"
    ssms_cursor.execute(id_query)
    max_id = ssms_cursor.fetchone()[0]
    new_id = max_id + 1 if max_id is not None else 1
    
    # Step 2: Update the previously active row to set IsActive to 0 and update EndDate
    effective_date = datetime.now()
    update_query = "UPDATE dbo.PRODUCT SET IsActive = 0, EndDate = ? WHERE SK = ?"
    ssms_cursor.execute(update_query, (effective_date, row[-4]))
    ssms_conn.commit()

    # Step 3: Insert a new row without specifying the SK (identity column)
    params = (new_id,) + row[1:-4] + (effective_date, None, 1)
    insert_query = "INSERT INTO dbo.PRODUCT (id, name, image, description, type_id, type_name, line_id, line_name, introduction_date, production_cost, margin, language, EffectiveDate, EndDate, IsActive) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
    ssms_cursor.execute(insert_query, params)
    ssms_conn.commit()

# Assuming ssms_cursor and ssms_conn are already defined and connected to your database
simulate_scd_type_2()

# Close the cursor and connection
#ssms_cursor.close()
#ssms_conn.close()

In [None]:
# Close connections
ssms_conn.close()
ssms_cursor.close()
sqlite_conn.close()
go_sales_conn.close()
go_staff_conn.close()
go_crm_conn.close()