In [1]:
import pyodbc
import sqlite3
import pandas as pd

export_conn = pyodbc.connect(
    'DRIVER={ODBC Driver 17 for SQL Server};'
    'SERVER=(localdb)\\MSSQLLocalDB;'
    'DATABASE=projectdatawarehouse;'
    'Trusted_Connection=yes;'
)

export_cursor = export_conn.cursor()

import_conn = pyodbc.connect(
    'DRIVER={ODBC Driver 17 for SQL Server};'
    'SERVER=(localdb)\\MSSQLLocalDB;'
    'DATABASE=projectsourcedatamodel;'
    'Trusted_Connection=yes;'
)

import_cursor = import_conn.cursor()

In [12]:
def clear_tables():
    tables = [
        'Fact_EmployeePerformance',
        'Fact_Sales',
        'Fact_Purchase',
        'Dim_Employee',
        'Dim_Location',
        'Dim_DateTime',
        'Dim_Product',
        'Dim_Vendor',
        'Dim_Customer'
    ]

    for table in tables:
        export_cursor.execute(f"DELETE FROM {table}")
        export_conn.commit()

    print("Alle tabellen zijn geleegd.")

clear_tables()

Alle tabellen zijn geleegd.


In [13]:
def move_dimcustomer(import_conn, export_conn):
    # Data ophalen uit de verschillende tabellen
    customer = pd.read_sql_query("SELECT CustomerID, Fname, Lname, Phone, Address, City, Region, State, Country FROM Customer", import_conn)
    customer_customer_demo = pd.read_sql_query("SELECT CustomerID, CustomerTypeID FROM CustomerCustomerDemo", import_conn)
    customer_demographics = pd.read_sql_query("SELECT CustomerTypeID, CustomerDesc FROM CustomerDemographics", import_conn)
    
    # Data mergen met pandas
    merged_df = customer.merge(customer_customer_demo, on='CustomerID', how='left')
    merged_df = merged_df.merge(customer_demographics, on='CustomerTypeID', how='left')
    
    # Kolommen transformeren
    merged_df['FullName'] = merged_df['Fname'] + ' ' + merged_df['Lname']
    merged_df['Region'] = merged_df['Region'].combine_first(merged_df['State'])  # Gebruik Region, fallback op State
    
    # Selecteer alleen de benodigde kolommen
    final_df = merged_df[[
        'CustomerID', 
        'FullName', 
        'CustomerDesc',  # Dit is de CustomerType die we willen
        'Phone', 
        'Address', 
        'City', 
        'Region', 
        'Country'
    ]].rename(columns={'CustomerDesc': 'CustomerType'})
    
    # Data naar doeltablet schrijven
    cursor = export_conn.cursor()
    
    for index, row in final_df.iterrows():
        try:
            query = '''
                INSERT INTO Dim_Customer 
                (CustomerID, FullName, CustomerType, PhoneNumber, Address, City, Region, Country) 
                SELECT ?, ?, ?, ?, ?, ?, ?, ?
                WHERE NOT EXISTS (
                    SELECT 1 FROM Dim_Customer WHERE CustomerID = ?
                )
            '''
            cursor.execute(query, tuple(row.fillna('')) + (row['CustomerID'],))
        except Exception as e:
            print(f"Fout bij invoegen customer {row['CustomerID']}: {e}")
    
    export_conn.commit()
    print(f"{len(final_df)} customers verwerkt in Dim_Customer")

# Roep de functie aan
move_dimcustomer(import_conn, export_conn)

  customer = pd.read_sql_query("SELECT CustomerID, Fname, Lname, Phone, Address, City, Region, State, Country FROM Customer", import_conn)
  customer_customer_demo = pd.read_sql_query("SELECT CustomerID, CustomerTypeID FROM CustomerCustomerDemo", import_conn)
  customer_demographics = pd.read_sql_query("SELECT CustomerTypeID, CustomerDesc FROM CustomerDemographics", import_conn)


217 customers verwerkt in Dim_Customer


In [None]:
def move_dimemployee():
    import_cursor.execute("""
        SELECT 
            e.EmployeeID,
            e.FirstName + ' ' + e.LastName AS FullName,
            COALESCE(e.JobTitle, e.Title, 'Onbekend') AS JobTitle,
            d.dept_name AS Department,
            e.HireDate,
            e.BirthDate,
            NULL AS EmailAddress,
            NULL AS ManagerID  
        FROM Employee e
        LEFT JOIN Department d ON e.DepartmentID = d.dept_id
    """)
    
    employees = import_cursor.fetchall()
    
    for employee in employees:
        export_cursor.execute("""
            INSERT INTO Dim_Employee (
                EmployeeID,
                FullName,
                JobTitle,
                Department,
                HireDate,
                BirthDate,
                EmailAddress,
                ManagerID
            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
        """, employee)
    
    import_cursor.execute("""
        SELECT e.EmployeeID, e.manager_id
        FROM Employee e
        WHERE e.manager_id IS NOT NULL
    """)
    
    manager_pairs = import_cursor.fetchall()
    
    for emp_id, manager_id in manager_pairs:
        export_cursor.execute("""
            UPDATE Dim_Employee
            SET ManagerID = ?
            WHERE EmployeeID = ?
            AND EXISTS (SELECT 1 FROM Dim_Employee WHERE EmployeeID = ?)  
        """, (manager_id, emp_id, manager_id))
    
    export_conn.commit()
    print(f"{len(employees)} employees imported into Dim_Employee")

move_dimemployee()

351 employees imported into Dim_Employee


In [14]:
def move_dimproduct(import_conn, export_conn):
    # Data ophalen uit de verschillende tabellen
    product = pd.read_sql_query("""
        SELECT 
            ProductID, 
            Name AS ProductName,
            Color,
            StandardCost,
            ListPrice,
            ProductLine,
            Discontinued,
            CategoryID
        FROM Product
    """, import_conn)
    
    category = pd.read_sql_query("""
        SELECT 
            CategoryID, 
            CategoryName AS ProductCategoryName
        FROM Category
    """, import_conn)
    
    # Data mergen met pandas
    merged_df = product.merge(category, on='CategoryID', how='left')
    
    # Discontinued omzetten naar DiscontinuedDate (indien discontinued)
    merged_df['DiscontinuedDate'] = merged_df['Discontinued'].apply(
        lambda x: pd.Timestamp.today().date() if x == 1 else None
    )
    
    # Selecteer alleen de benodigde kolommen
    final_df = merged_df[[
        'ProductID', 
        'ProductName', 
        'ProductCategoryName', 
        'Color', 
        'StandardCost', 
        'ListPrice', 
        'ProductLine', 
        'DiscontinuedDate'
    ]]
    
    # Data naar doeltablet schrijven
    cursor = export_conn.cursor()
    
    for index, row in final_df.iterrows():
        try:
            query = '''
                INSERT INTO Dim_Product 
                (ProductID, ProductName, ProductCategoryName, Color, 
                 StandardCost, ListPrice, ProductLine, DiscontinuedDate) 
                SELECT ?, ?, ?, ?, ?, ?, ?, ?
                WHERE NOT EXISTS (
                    SELECT 1 FROM Dim_Product WHERE ProductID = ?
                )
            '''
            # Tuple van waarden + ProductID voor de WHERE NOT EXISTS check
            values = tuple(row.fillna('') if isinstance(row, pd.Series) else row) + (row['ProductID'],)
            cursor.execute(query, values)
        except Exception as e:
            print(f"Fout bij invoegen product {row['ProductID']}: {e}")
    
    export_conn.commit()
    print(f"{len(final_df)} products verwerkt in Dim_Product")

# Roep de functie aan
move_dimproduct(import_conn, export_conn)

  product = pd.read_sql_query("""


DatabaseError: Execution failed on sql '
        SELECT 
            ProductID, 
            Name AS ProductName,
            Color,
            StandardCost,
            ListPrice,
            ProductLine,
            Discontinued,
            CategoryID
        FROM Product
    ': ('42S22', "[42S22] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Invalid column name 'StandardCost'. (207) (SQLExecDirectW); [42S22] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Invalid column name 'ListPrice'. (207); [42S22] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Invalid column name 'ProductLine'. (207)")

In [19]:
export_conn.close()
import_conn.close()