## Implementeer elk gemaakt ETL-schema in Python

**imports & connection**

In [1]:
import pandas as pd
import pyodbc

In [2]:
DB = {'servername': 'LAPTOP-LPE28RPE\SQLEXPRESS', 
    'database': 'TestSCD'}

export_conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + DB['servername'] + 
                              ';DATABASE=' + DB['database'])

export_cursor = export_conn.cursor()
export_cursor   

<pyodbc.Cursor at 0x26aea00a8b0>

**Database connection**

In [3]:
Adventure = {
    'servername' : 'LAPTOP-LPE28RPE\SQLEXPRESS',
    'database' : 'AdventureWorks2019'
}


Northwind = {
    'servername' : 'LAPTOP-LPE28RPE\SQLEXPRESS',
    'database' : 'Northwind'
}

access_db_path = r'C:\Users\Humberto de Castro\OneDrive\Desktop\SEM4\AenC\aenc.accdb'

#Connect to AdventureWorks
Adventure_conn = pyodbc.connect(f"DRIVER={{SQL Server}};SERVER={Adventure['servername']};DATABASE={Adventure['database']};Trusted_Connection=yes;")
Adventure_cursor = Adventure_conn.cursor()

#Connect to Northwind
Northwind_conn = pyodbc.connect(f"DRIVER={{SQL Server}};SERVER={Northwind['servername']};DATABASE={Northwind['database']};Trusted_Connection=yes;")
Northwind_cursor = Northwind_conn.cursor()

#Connect to AenC
AenC_conn = pyodbc.connect(f"DRIVER={{Microsoft Access Driver (*.mdb, *.accdb)}};DBQ={access_db_path};")



In [4]:
AdventureWorks_queries = {
    'Shipmethod' : 'SELECT * FROM Purchasing.Shipmethod'
}

Northwind_queries = {
    'Shippers' : 'SELECT * FROM Shippers'
}

AenC_queries = {
    'sales_order' : 'SELECT * FROM sales_order',
    'sales_order_item' : 'SELECT * FROM sales_order_item',
    'Product' : 'SELECT * FROM Product' 
}

dataframes = {}

# Lees elke tabel in een DataFrame

for table_name, query in AdventureWorks_queries.items():
    dataframes[table_name] = pd.read_sql_query(query, Adventure_conn)

for table_name, query in Northwind_queries.items():
    dataframes[table_name] = pd.read_sql_query(query, Northwind_conn)

for table_name, query in AenC_queries.items():
    dataframes[table_name] = pd.read_sql_query(query, AenC_conn)

#als je ik elk tabel als een dataframe/ variabele wil behandelen of aanroepen moet ik dit uitvoeren.
for table_name, df in dataframes.items():
    globals()[table_name] = df


  dataframes[table_name] = pd.read_sql_query(query, Adventure_conn)
  dataframes[table_name] = pd.read_sql_query(query, Northwind_conn)
  dataframes[table_name] = pd.read_sql_query(query, AenC_conn)


## Ship

Transform

In [5]:
Shippers.rename(columns={
    'ShipperID': 'ShipMethodID',
    'CompanyName' : 'Name'
}, inplace=True)

In [6]:
Shipmethod['ShipMethodID'] = Shipmethod['ShipMethodID'].apply(lambda x: f'AW_{x}')
Shippers['ShipMethodID'] = Shippers['ShipMethodID'].apply(lambda x: f'NW_{x}')
# AenC_Order heeft geen ShipMethodID

In [7]:
# Zorg ervoor dat alle vereiste kolommen bestaan en selecteer ze
required_columns = ['ShipMethodID', 'Name', 'ShipBase', 'ShipRate']

for col in required_columns:
    if col not in Shippers.columns:
        Shippers[col] = None
    if col not in Shipmethod.columns:
        Shipmethod[col] = None

# Selecteer de kolommen in de juiste volgorde
Shippers = Shippers[required_columns]
Shipmethod = Shipmethod[required_columns]

# Combineer de dataframes
Ship = pd.concat([Shipmethod, Shippers], ignore_index=True)

print(Ship.dtypes)

ShipMethodID     object
Name             object
ShipBase        float64
ShipRate        float64
dtype: object


  Ship = pd.concat([Shipmethod, Shippers], ignore_index=True)


### Load in ssms

In [8]:
# Insert data into the Ship table
for index, row in Ship.iterrows():
    try:
        query = """
        INSERT INTO [Ship] (ShipMethodID, Name, ShipBase, ShipRate)
        VALUES (?, ?, ?, ?)
        """
        # Ensure values are in the correct format
        params = (
            row['ShipMethodID'],
            row['Name'],
            float(row['ShipBase']) if pd.notnull(row['ShipBase']) else None,
            float(row['ShipRate']) if pd.notnull(row['ShipRate']) else None
        )
        export_cursor.execute(query, params)
    except pyodbc.Error as e:
        print(f"An error occurred: {e}")
        print("Query:", query)
        print("Parameters:", params)
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        print("Query:", query)
        print("Parameters:", params)

export_conn.commit()


### **Data_mutatie simuleren**

Ship

In [9]:
# Function to merge data into Ship table
def merge_ship(ship_method_id, name, ship_base, ship_rate):
    merge_query = "{CALL MergeShip (?, ?, ?, ?)}"
    export_cursor.execute(merge_query, (ship_method_id, name, ship_base, ship_rate))
    export_conn.commit()
    print("Merged into Ship table")

In [32]:
# Function to insert data into Ship table
def insert_ship(ship_method_id, name, ship_base, ship_rate):
    insert_query = """
    INSERT INTO Ship (ShipMethodID, Name, ShipBase, ShipRate)
    VALUES (?, ?, ?, ?)
    """
    export_cursor.execute(insert_query, (ship_method_id, name, ship_base, ship_rate))
    export_conn.commit()
    print("Inserted into Ship table")

In [33]:
# Function to update data in Ship table
def update_ship(ship_surrogate_key, new_name):
    update_query = """
    UPDATE Ship
    SET Name = ?
    WHERE ShipSurrogateKey = ?
    """
    export_cursor.execute(update_query, (new_name, ship_surrogate_key))
    export_conn.commit()
    print("Updated Ship table")


In [10]:
# Example usage
try:

     # Merge into Ship table
    merge_ship('AW_5', 'Speedy', 5.00, 1.00)

    # Update Ship table (Type 2 SCD)
    merge_ship('AW_5', 'Super Speedy', 6.00, 1.50)


finally:
    # Close the connection
    print('connection closed')

Merged into Ship table
Merged into Ship table
connection closed
