## Implementeer elk gemaakt ETL-schema in Python

**imports & connection**

In [1]:
import pandas as pd
import pyodbc

In [2]:
DB = {'servername': 'LAPTOP-LPE28RPE\SQLEXPRESS', 
    'database': 'United_outdoors'}

export_conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + DB['servername'] + 
                              ';DATABASE=' + DB['database'])

export_cursor = export_conn.cursor()
export_cursor   

<pyodbc.Cursor at 0x24dff3b4030>

**Database connection**

In [3]:
Adventure = {
    'servername' : 'LAPTOP-LPE28RPE\SQLEXPRESS',
    'database' : 'AdventureWorks2019'
}


Northwind = {
    'servername' : 'LAPTOP-LPE28RPE\SQLEXPRESS',
    'database' : 'Northwind'
}

access_db_path = r'C:\Users\Humberto de Castro\OneDrive\Desktop\SEM4\AenC\aenc.accdb'

#Connect to AdventureWorks
Adventure_conn = pyodbc.connect(f"DRIVER={{SQL Server}};SERVER={Adventure['servername']};DATABASE={Adventure['database']};Trusted_Connection=yes;")
Adventure_cursor = Adventure_conn.cursor()

#Connect to Northwind
Northwind_conn = pyodbc.connect(f"DRIVER={{SQL Server}};SERVER={Northwind['servername']};DATABASE={Northwind['database']};Trusted_Connection=yes;")
Northwind_cursor = Northwind_conn.cursor()

#Connect to AenC
AenC_conn = pyodbc.connect(f"DRIVER={{Microsoft Access Driver (*.mdb, *.accdb)}};DBQ={access_db_path};")



In [4]:
AdventureWorks_queries = {
    'SalesPerson' : 'SELECT * FROM Sales.SalesPerson',
    'Quota' : 'SELECT * FROM Sales.SalesPersonQuotaHistory'
}

Northwind_queries = {
    'Shippers' : 'SELECT * FROM Shippers'
}

AenC_queries = {
    'sales_order' : 'SELECT * FROM sales_order',
    'sales_order_item' : 'SELECT * FROM sales_order_item',
    'Product' : 'SELECT * FROM Product' 
}

dataframes = {}

# Lees elke tabel in een DataFrame

for table_name, query in AdventureWorks_queries.items():
    dataframes[table_name] = pd.read_sql_query(query, Adventure_conn)

for table_name, query in Northwind_queries.items():
    dataframes[table_name] = pd.read_sql_query(query, Northwind_conn)

for table_name, query in AenC_queries.items():
    dataframes[table_name] = pd.read_sql_query(query, AenC_conn)

#als je ik elk tabel als een dataframe/ variabele wil behandelen of aanroepen moet ik dit uitvoeren.
for table_name, df in dataframes.items():
    globals()[table_name] = df


  dataframes[table_name] = pd.read_sql_query(query, Adventure_conn)
  dataframes[table_name] = pd.read_sql_query(query, Northwind_conn)
  dataframes[table_name] = pd.read_sql_query(query, AenC_conn)


## Salesperson

Transform

In [5]:
Quota.rename(columns={
    'BusinessEntityID': 'EmployeeID',
}, inplace=True)

In [6]:
# Prefixes toevoegen
Quota['EmployeeID'] = Quota['EmployeeID'].apply(lambda x: f'AW_{x}')

In [7]:
# Zorg ervoor dat alle vereiste kolommen bestaan en selecteer ze
required_columns = ['EmployeeID', 'QuotaDate', 'SalesQuota', 'Bonus']

for col in required_columns:
    if col not in Quota.columns:
        Quota[col] = None


# Selecteer de kolommen in de juiste volgorde
Quota = Quota[required_columns]

print(Quota)


    EmployeeID  QuotaDate  SalesQuota Bonus
0       AW_274 2011-05-31     28000.0  None
1       AW_274 2011-08-31      7000.0  None
2       AW_274 2011-12-01     91000.0  None
3       AW_274 2012-02-29    140000.0  None
4       AW_274 2012-05-30     70000.0  None
..         ...        ...         ...   ...
158     AW_290 2013-02-28    404000.0  None
159     AW_290 2013-05-30   1262000.0  None
160     AW_290 2013-08-30   1057000.0  None
161     AW_290 2013-11-30    707000.0  None
162     AW_290 2014-03-01    908000.0  None

[163 rows x 4 columns]


### Load in ssms

In [9]:
for index, row in Quota.iterrows():
    try:
        # Lookup SalesPerson_FSK based on EmployeeID
        lookup_query = "SELECT SalesPersonSurrogateKey FROM SalesPerson WHERE EmployeeID = ? AND IsCurrent = 1"
        export_cursor.execute(lookup_query, row['EmployeeID'])
        sales_person_fsk = export_cursor.fetchone()

        if sales_person_fsk:
            sales_person_fsk = sales_person_fsk[0]
            query = """
            INSERT INTO [Quota] (EmployeeID, SalesPerson_FSK, QuotaDate, SalesQuota)
            VALUES (?, ?, ?, ?)
            """
            # Ensure values are in the correct format
            params = (
                row['EmployeeID'],
                sales_person_fsk,
                row['QuotaDate'],
                float(row['SalesQuota']) if pd.notnull(row['SalesQuota']) else None
            )
            export_cursor.execute(query, params)
        else:
            print(f"No SalesPerson_FSK found for EmployeeID: {row['EmployeeID']}")

    except pyodbc.Error as e:
        print(f"An error occurred: {e}")
        print(query)
        print("Parameters:", params)

export_conn.commit()


### **Data_mutatie simuleren**

In [None]:
# Function to merge data into Quota table
def merge_quota(employee_id, quota_date, sales_quota):
    merge_query = "{CALL MergeQuota (?, ?, ?)}"
    export_cursor.execute(merge_query, (employee_id, quota_date, sales_quota))
    export_conn.commit()
    print("Merged into Quota table")


In [None]:
try:
     # Merge into Quota table
    merge_quota('E123', '2023-06-07', 50000.0)
    merge_quota('E123', '2023-06-07', 60000.0)
    merge_quota('AW_290', '2023-06-07', 60000.0)
finally:
    # Close the connection
    print("Connection closed")