## Implementeer elk gemaakt ETL-schema in Python

**imports & connection**

In [1]:
import pandas as pd
import pyodbc

In [2]:
DB = {'servername': 'LAPTOP-LPE28RPE\SQLEXPRESS', 
    'database': 'United_outdoors'}

export_conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + DB['servername'] + 
                              ';DATABASE=' + DB['database'])

export_cursor = export_conn.cursor()
export_cursor   

<pyodbc.Cursor at 0x1c06e544030>

**Database connection**

In [3]:
Adventure = {
    'servername' : 'LAPTOP-LPE28RPE\SQLEXPRESS',
    'database' : 'AdventureWorks2019'
}


Northwind = {
    'servername' : 'LAPTOP-LPE28RPE\SQLEXPRESS',
    'database' : 'Northwind'
}

access_db_path = r'C:\Users\Humberto de Castro\OneDrive\Desktop\SEM4\AenC\aenc.accdb'

#Connect to AdventureWorks
Adventure_conn = pyodbc.connect(f"DRIVER={{SQL Server}};SERVER={Adventure['servername']};DATABASE={Adventure['database']};Trusted_Connection=yes;")
Adventure_cursor = Adventure_conn.cursor()

#Connect to Northwind
Northwind_conn = pyodbc.connect(f"DRIVER={{SQL Server}};SERVER={Northwind['servername']};DATABASE={Northwind['database']};Trusted_Connection=yes;")
Northwind_cursor = Northwind_conn.cursor()

#Connect to AenC
AenC_conn = pyodbc.connect(f"DRIVER={{Microsoft Access Driver (*.mdb, *.accdb)}};DBQ={access_db_path};")



In [4]:
AdventureWorks_queries = {
    'SalesPerson' : 'SELECT * FROM Sales.SalesPerson',
    'Quota' : 'SELECT * FROM Sales.SalesPersonQuotaHistory'
}

Northwind_queries = {
    'Shippers' : 'SELECT * FROM Shippers'
}

AenC_queries = {
    'sales_order' : 'SELECT * FROM sales_order',
    'sales_order_item' : 'SELECT * FROM sales_order_item',
    'Product' : 'SELECT * FROM Product' 
}

dataframes = {}

# Lees elke tabel in een DataFrame

for table_name, query in AdventureWorks_queries.items():
    dataframes[table_name] = pd.read_sql_query(query, Adventure_conn)

for table_name, query in Northwind_queries.items():
    dataframes[table_name] = pd.read_sql_query(query, Northwind_conn)

for table_name, query in AenC_queries.items():
    dataframes[table_name] = pd.read_sql_query(query, AenC_conn)

#als je ik elk tabel als een dataframe/ variabele wil behandelen of aanroepen moet ik dit uitvoeren.
for table_name, df in dataframes.items():
    globals()[table_name] = df


  dataframes[table_name] = pd.read_sql_query(query, Adventure_conn)
  dataframes[table_name] = pd.read_sql_query(query, Northwind_conn)
  dataframes[table_name] = pd.read_sql_query(query, AenC_conn)


## Salesperson

Transform

In [5]:
SalesPerson.rename(columns={
    'BusinessEntityID': 'EmployeeID',
}, inplace=True)

In [6]:
# Prefixes toevoegen
SalesPerson['EmployeeID'] = SalesPerson['EmployeeID'].apply(lambda x: f'AW_{x}')

In [7]:
# Zorg ervoor dat alle vereiste kolommen bestaan en selecteer ze
required_columns = ['EmployeeID', 'TerritoryID', 'SalesQuota', 'Bonus', 'SalesYTD', 'SalesLastYear']

for col in required_columns:
    if col not in SalesPerson.columns:
        SalesPerson[col] = None


# Selecteer de kolommen in de juiste volgorde
SalesPerson = SalesPerson[required_columns]

print(SalesPerson)


   EmployeeID  TerritoryID  SalesQuota   Bonus      SalesYTD  SalesLastYear
0      AW_274          NaN         NaN     0.0  5.596976e+05   0.000000e+00
1      AW_275          2.0    300000.0  4100.0  3.763178e+06   1.750406e+06
2      AW_276          4.0    250000.0  2000.0  4.251369e+06   1.439156e+06
3      AW_277          3.0    250000.0  2500.0  3.189418e+06   1.997186e+06
4      AW_278          6.0    250000.0   500.0  1.453719e+06   1.620277e+06
5      AW_279          5.0    300000.0  6700.0  2.315186e+06   1.849641e+06
6      AW_280          1.0    250000.0  5000.0  1.352577e+06   1.927059e+06
7      AW_281          4.0    250000.0  3550.0  2.458536e+06   2.073506e+06
8      AW_282          6.0    250000.0  5000.0  2.604541e+06   2.038235e+06
9      AW_283          1.0    250000.0  3500.0  1.573013e+06   1.371635e+06
10     AW_284          1.0    300000.0  3900.0  1.576562e+06   0.000000e+00
11     AW_285          NaN         NaN     0.0  1.725245e+05   0.000000e+00
12     AW_28

### Load in ssms

In [8]:
# Insert data into the SalesPerson table
for index, row in SalesPerson.iterrows():
    try:
        query = """
        INSERT INTO [SalesPerson] (EmployeeID, TerritoryID, SalesQuota, Bonus, SalesYTD, SalesLastYear)
        VALUES (?, ?, ?, ?, ?, ?)
        """
        # Ensure values are in the correct format
        params = (
            row['EmployeeID'],
            row['TerritoryID'] if pd.notnull(row['TerritoryID']) else None,
            float(row['SalesQuota']) if pd.notnull(row['SalesQuota']) else None,
            row['Bonus'] if pd.notnull(row['Bonus']) else None,
            float(row['SalesYTD']) if pd.notnull(row['SalesYTD']) else None,
            float(row['SalesLastYear']) if pd.notnull(row['SalesLastYear']) else None
        )
        export_cursor.execute(query, params)
    except pyodbc.Error as e:
        print(f"An error occurred: {e}")
        print(query)
        print("Parameters:", params)

export_conn.commit()


### **Data_mutatie simuleren**

In [None]:
# Function to merge data into SalesPerson table
def merge_salesperson(employee_id, territory_id, sales_quota, bonus, sales_ytd, sales_last_year):
    merge_query = "{CALL MergeSalesPerson (?, ?, ?, ?, ?, ?)}"
    export_cursor.execute(merge_query, (employee_id, territory_id, sales_quota, bonus, sales_ytd, sales_last_year))
    export_conn.commit()
    print("Merged into SalesPerson table")

In [None]:
try:
   # Merge into SalesPerson table
    merge_salesperson('E123', 1, 100000.0, 5000.0, 75000.0, 70000.0)
    merge_salesperson('E123', 1, 120000.0, 6000.0, 80000.0, 75000.0)
    merge_salesperson('AW_290', 1, 120000.0, 6000.0, 80000.0, 75000.0)
finally:
    # Close the connection
    print("Connection closed")