# United outdoors datawarehouse

## Imports

In [439]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, BigInteger, Integer, String, LargeBinary, VARCHAR, NVARCHAR, DECIMAL, CHAR, \
    DATE
from sqlalchemy.dialects.mssql import BIT, XML, MONEY, TIME
from sqlalchemy.exc import OperationalError
from urllib import parse
import re
import time

## Starting timer

In [440]:
start_time = time.time()

## Database connection details

In [441]:
DB = {
    'servername' : '(local)\\SQLEXPRESS',
    'united_outdoors_database' : 'UnitedOutdoors',
    'northwind_database' : 'Northwind',
    'aenc_database' : 'Aenc',
    'adventureworks_database' : 'AdventureWorks2019',
    'master' : 'master'
}

In [442]:
def create_connection(servername, database):
    params = parse.quote_plus(f'DRIVER={{SQL Server}};SERVER={servername};DATABASE={database};Trusted_Connection=yes')
    engine = create_engine(f'mssql+pyodbc:///?odbc_connect={params}', use_setinputsizes=False, connect_args={'options': '-c search_path=dbo'}, fast_executemany=True) # setinputsizes needs to be turned off for sql server, idk why but gives errors otherwise
    try:
        establish_conn = engine.connect()
        print(f'Connection to {database} database successful')
        return establish_conn, engine
    except OperationalError as e:
        print(f'Error: {e}')
        return None, None

In [443]:
def split_and_execute_sql_script(script, execute_engine):
    # creating a connection
    connection = execute_engine.connect()
    
    # splitting the script into the database creation and the rest
    commands = re.split(r'GO\n', script)
    # removing all \bGO\b from the commands
    commands = [re.sub(r'\bGO\b', '', command) for command in commands]
    
    # Execute the commands
    for command in commands:
        command = command.strip()
        # Skip if the command is empty or 'GO'
        if not command or command.upper() == 'GO':
            continue
        try:
            connection.connection.execute(command)
            connection.connection.commit()
            #print(f'Command executed: {command}')
        except OperationalError as e:
            print(f'Error: {e} at command: {command}')

In [444]:
def bulk_insert(df, dtypes, table_name, engine):
    # calculating chunk size
    chunk_size = (2000 // len(df.columns))  # 2100 is the maximum number of parameters in a query, -100 for safety

    print(f'Inserting data into table: {table_name} with chunk size: {chunk_size}')
    df.to_sql(name=table_name, schema='dbo', con=engine, if_exists='append', index=False, dtype=dtypes, method='multi', chunksize= chunk_size)

In [445]:
def prepare(dataframe, nk_sk_dict=None):
    # replacing the natural keys with the surrogate keys
    if nk_sk_dict:
        for column in nk_sk_dict:
            # Check for duplicate keys
            if len(nk_sk_dict[column]) != len(set(nk_sk_dict[column])):
                raise ValueError(f'Duplicate keys found in nk_sk_dict for column: {column}')
            else:
                print(f'Replacing natural keys with surrogate keys for column: {column}')
                for natural_key in nk_sk_dict[column]:
                    dataframe[column] = dataframe[column].replace(natural_key, nk_sk_dict[column][natural_key])
    
    # replace empty values with None
    dataframe = dataframe.where(pd.notnull(dataframe), None)
    dataframe = dataframe.replace({np.nan: None})
    
    # stripping all columns with string data
    dataframe = dataframe.map(lambda x: x.strip() if isinstance(x, str) else x)
    # replacing all empty strings with None
    dataframe = dataframe.replace(r'^\s*$', None, regex=True)

In [446]:
def prepare_and_insert(dataframe, dtypes, table_name, insert_engine, nk_sk_dict=None):
    """
    Prepares the dataframe for insertion into the database and inserts it into the database.
        @param dataframe: The dataframe to be inserted into the database
        @param dtypes: The data types of the columns in the dataframe
        @param table_name: The name of the table in the database
        @param insert_engine: The engine to insert the data into
        @param nk_sk_dict: A 3d dictionary containing the natural keys and their corresponding surrogate keys, per column (so nk_sk_dict[column][natural_key] = surrogate_key)
    """
       
    prepare(dataframe, nk_sk_dict)
    
    # adding the data to the database
    bulk_insert(dataframe, dtypes, table_name, insert_engine)

In [447]:
def prepare_and_insert_return_sk(dataframe, dtypes, table_name, insert_engine, natural_key_column, nk_sk_dict=None):
    """
    Prepares the dataframe for insertion into the database and inserts it into the database.
        @param dataframe: The dataframe to be inserted into the database
        @param dtypes: The data types of the columns in the dataframe
        @param table_name: The name of the table in the database
        @param insert_engine: The engine to insert the data into
        @param natural_key_column: The name of the column containing the natural keys
        @param nk_sk_dict: A 3d dictionary containing the natural keys and their corresponding surrogate keys, per column (so nk_sk_dict[column][natural_key] = surrogate_key)
        @return: A dictionary containing the natural keys and their corresponding surrogate keys
    """
    
    prepare(dataframe, nk_sk_dict)
    
    # adding the data to the database
    bulk_insert(dataframe, dtypes, table_name, insert_engine)
    
    # making a cleaned dictionary without None/nan values in the natural_key_column
    mask = dataframe[natural_key_column].notnull()
    filtered_dataframe = dataframe[mask]
    
    # getting the natural keys and their corresponding surrogate keys
    # TODO FIX THIS, relying on the dataframe index is not a good idea
    nk_sk_dict = dict(zip(filtered_dataframe[natural_key_column], dataframe.index))
    
    # adding +1 to the index to get the surrogate key
    nk_sk_dict = {k: v + 1 for k, v in nk_sk_dict.items()}
    
    return nk_sk_dict

In [448]:
def prepare_and_update(table_name, update_engine, nk_sk_dict=None):   
    # creating a connection
    connection = update_engine.connect()

    # Start a transaction
    trans = connection.begin()

    try:
        # adding the data to the database
        print(f'Updating data in table: {table_name}')
        for column in nk_sk_dict:
            # Prepare a batch update statement
            update_stmts = []
            for natural_key in nk_sk_dict[column]:
                # TODO this would not work with updated data in datawarehouse, since this would replace the old and new data. Maybe check the datetime or something
                statement = f'UPDATE {table_name} SET {column} = {nk_sk_dict[column][natural_key]} WHERE {column} = {int(natural_key)}'
                update_stmts.append(statement)

                # If the number of parameters reaches the limit, execute the batch update and clear the list
                if len(update_stmts) * 2 >= 2000:  # Each update statement has 2 parameters, cause of the Column and Where clause
                    connection.connection.execute(";".join(update_stmts))
                    update_stmts = []

            # Execute the remaining update statements
            if update_stmts:
                connection.connection.execute(";".join(update_stmts))

        # Commit the transaction
        trans.commit()
    except:
        # Rollback the transaction in case of error
        trans.rollback()
        raise
    finally:
        # Close the connection
        connection.close()

In [449]:
def drop_modified_date_rowguid(dataframe):
    # dropping all columns with 'rowguid' in their name
    columns_to_drop_mr = dataframe.filter(like='rowguid').columns
    
    # dropping all columns with 'ModifiedDate' in their name
    columns_to_drop_mr = columns_to_drop_mr.append(dataframe.filter(like='ModifiedDate').columns)
    
    # dropping the columns
    dataframe.drop(columns=columns_to_drop_mr, inplace=True)

## Create the UnitedOutdoors datawarehouse

In [450]:
_, creation_engine = create_connection(DB["servername"], DB["master"])

# Open the SQL script file and read its contents
with open('sql/UnitedOutdoors_creation.sql', 'r') as file:
    sql_script = file.read()

split_and_execute_sql_script(sql_script, creation_engine)

creation_engine.dispose()

Connection to master database successful


## Connecting to the UnitedOutdoors datawarehouse

In [451]:
_ , united_outdoors_engine = create_connection(DB["servername"], DB["united_outdoors_database"])

Connection to UnitedOutdoors database successful


## Loading the data from the source databases

### Northwind database

#### Connection

In [452]:
northwind_conn, northwind_engine = create_connection(DB["servername"], DB["northwind_database"])

Connection to Northwind database successful


#### Loading data

In [453]:
# Load the data from the source database
northwind_categories = pd.read_sql('SELECT * FROM Categories', northwind_conn)
northwind_customer_customer_demo = pd.read_sql('SELECT * FROM CustomerCustomerDemo', northwind_conn)
northwind_customer_demographics = pd.read_sql('SELECT * FROM CustomerDemographics', northwind_conn)
northwind_customers = pd.read_sql('SELECT * FROM Customers', northwind_conn)
northwind_employees = pd.read_sql('SELECT * FROM Employees', northwind_conn)
northwind_employee_territories = pd.read_sql('SELECT * FROM EmployeeTerritories', northwind_conn)
northwind_order_details = pd.read_sql('SELECT * FROM [Order Details]', northwind_conn)
northwind_orders = pd.read_sql('SELECT * FROM Orders', northwind_conn)
northwind_products = pd.read_sql('SELECT * FROM Products', northwind_conn)
northwind_region = pd.read_sql('SELECT * FROM Region', northwind_conn)
northwind_shippers = pd.read_sql('SELECT * FROM Shippers', northwind_conn)
northwind_suppliers = pd.read_sql('SELECT * FROM Suppliers', northwind_conn)
northwind_territories = pd.read_sql('SELECT * FROM Territories', northwind_conn)

northwind_conn.close()

### Aenc database

#### Connection

In [454]:
aenc_conn , aenc_engine = create_connection(DB["servername"], DB["aenc_database"])

Connection to Aenc database successful


#### Loading data

In [455]:
aenc_bonus              = pd.read_sql('SELECT * FROM Bonus', aenc_conn)
aenc_customer           = pd.read_sql('SELECT * FROM Customer', aenc_conn)
aenc_department         = pd.read_sql('SELECT * FROM Department', aenc_conn)
aenc_employee           = pd.read_sql('SELECT * FROM Employee', aenc_conn)
aenc_product            = pd.read_sql('SELECT * FROM Product', aenc_conn)
aenc_region             = pd.read_sql('SELECT * FROM Region', aenc_conn)
aenc_sales_order        = pd.read_sql('SELECT * FROM SalesOrder', aenc_conn)
aenc_sales_order_item   = pd.read_sql('SELECT * FROM SalesOrderItem', aenc_conn)
aenc_state              = pd.read_sql('SELECT * FROM State', aenc_conn)

aenc_conn.close()

### AdventureWorks database

#### Connection

In [456]:
adventureworks_conn, adventureworks_engine = create_connection(DB["servername"], DB["adventureworks_database"])

Connection to AdventureWorks2019 database successful


#### Loading data

In [457]:
adventureworks_humanresources_department = pd.read_sql('SELECT * FROM HumanResources.Department', adventureworks_conn)
adventureworks_humanresources_employee = pd.read_sql('SELECT * FROM HumanResources.Employee', adventureworks_conn)
adventureworks_humanresources_employeedepartmenthistory = pd.read_sql('SELECT * FROM HumanResources.EmployeeDepartmentHistory', adventureworks_conn)
adventureworks_humanresources_employeepayhistory = pd.read_sql('SELECT * FROM HumanResources.EmployeePayHistory', adventureworks_conn)
adventureworks_humanresources_jobcandidate = pd.read_sql('SELECT * FROM HumanResources.JobCandidate', adventureworks_conn)
adventureworks_humanresources_shift = pd.read_sql('SELECT * FROM HumanResources.Shift', adventureworks_conn)

In [458]:
adventureworks_person_address = pd.read_sql('SELECT AddressID, AddressLine1, AddressLine2, City, StateProvinceID, PostalCode, CAST(SpatialLocation AS VARCHAR(MAX)) AS SpatialLocation,rowguid, ModifiedDate   FROM Person.Address', adventureworks_conn)
adventureworks_person_address_type = pd.read_sql('SELECT * FROM Person.AddressType', adventureworks_conn)
adventureworks_person_businessentity = pd.read_sql('SELECT * FROM Person.BusinessEntity', adventureworks_conn)
adventureworks_person_businessentityaddress = pd.read_sql('SELECT * FROM Person.BusinessEntityAddress', adventureworks_conn)
adventureworks_person_businessentitycontact = pd.read_sql('SELECT * FROM Person.BusinessEntityContact', adventureworks_conn)
adventureworks_person_contacttype = pd.read_sql('SELECT * FROM Person.ContactType', adventureworks_conn)
adventureworks_person_countryregion = pd.read_sql('SELECT * FROM Person.CountryRegion', adventureworks_conn)
adventureworks_person_emailaddress = pd.read_sql('SELECT * FROM Person.EmailAddress', adventureworks_conn)
adventureworks_person_password = pd.read_sql('SELECT * FROM Person.Password', adventureworks_conn)
adventureworks_person_person = pd.read_sql('SELECT * FROM Person.Person', adventureworks_conn)
adventureworks_person_personphone = pd.read_sql('SELECT * FROM Person.PersonPhone', adventureworks_conn)
adventureworks_person_phonenumbertype = pd.read_sql('SELECT * FROM Person.PhoneNumberType', adventureworks_conn)
adventureworks_person_stateprovince = pd.read_sql('SELECT * FROM Person.StateProvince', adventureworks_conn)

Exception during reset or similar
Traceback (most recent call last):
  File "c:\Users\yunyi\AppData\Local\pypoetry\Cache\virtualenvs\clodsire-2cKgtQrF-py3.12\Lib\site-packages\sqlalchemy\pool\base.py", line 986, in _finalize_fairy
    fairy._reset(
  File "c:\Users\yunyi\AppData\Local\pypoetry\Cache\virtualenvs\clodsire-2cKgtQrF-py3.12\Lib\site-packages\sqlalchemy\pool\base.py", line 1432, in _reset
    pool._dialect.do_rollback(self)
  File "c:\Users\yunyi\AppData\Local\pypoetry\Cache\virtualenvs\clodsire-2cKgtQrF-py3.12\Lib\site-packages\sqlalchemy\dialects\mssql\base.py", line 3117, in do_rollback
    super().do_rollback(dbapi_connection)
  File "c:\Users\yunyi\AppData\Local\pypoetry\Cache\virtualenvs\clodsire-2cKgtQrF-py3.12\Lib\site-packages\sqlalchemy\engine\default.py", line 698, in do_rollback
    dbapi_connection.rollback()
pyodbc.Error: ('01000', '[01000] [Microsoft][ODBC SQL Server Driver][DBMSLPCN]ConnectionWrite (WrapperWrite()). (233) (SQLEndTran); [01000] [Microsoft][ODB

In [459]:
adventureworks_production_bill_of_materials = pd.read_sql('SELECT * FROM Production.BillOfMaterials', adventureworks_conn)
adventureworks_production_culture = pd.read_sql('SELECT * FROM Production.Culture', adventureworks_conn)
adventureworks_production_document = pd.read_sql('SELECT * FROM Production.Document', adventureworks_conn)
adventureworks_production_illustration = pd.read_sql('SELECT * FROM Production.Illustration', adventureworks_conn)
adventureworks_production_location = pd.read_sql('SELECT * FROM Production.Location', adventureworks_conn)
adventureworks_production_product = pd.read_sql('SELECT * FROM Production.Product', adventureworks_conn)
adventureworks_production_productcategory = pd.read_sql('SELECT * FROM Production.ProductCategory', adventureworks_conn)
adventureworks_production_productcosthistory = pd.read_sql('SELECT * FROM Production.ProductCostHistory', adventureworks_conn)
adventureworks_production_productdescription = pd.read_sql('SELECT * FROM Production.ProductDescription', adventureworks_conn)
adventureworks_production_productdocument = pd.read_sql('SELECT * , CAST(DocumentNode AS VARCHAR(MAX)) AS DocumentNodeString  FROM Production.ProductDocument', adventureworks_conn)
adventureworks_production_productinventory = pd.read_sql('SELECT * FROM Production.ProductInventory', adventureworks_conn)
adventureworks_production_productlistpricehistory = pd.read_sql('SELECT * FROM Production.ProductListPriceHistory', adventureworks_conn)
adventureworks_production_productmodel = pd.read_sql('SELECT * FROM Production.ProductModel', adventureworks_conn)
adventureworks_production_productmodelillustration = pd.read_sql('SELECT * FROM Production.ProductModelIllustration', adventureworks_conn)
adventureworks_production_productmodelproductdescriptionculture = pd.read_sql('SELECT * FROM Production.ProductModelProductDescriptionCulture', adventureworks_conn)
adventureworks_production_productphoto = pd.read_sql('SELECT ProductPhotoID, CONVERT(VARCHAR(MAX),ThumbNailPhoto, 1) as ThumbNailPhotoHexString, ThumbNailPhotoFileName, CONVERT(VARCHAR(MAX), LargePhoto, 1) as LargePhotoHexString, LargePhotoFileName, ModifiedDate FROM Production.ProductPhoto', adventureworks_conn)
adventureworks_production_productproductphoto = pd.read_sql('SELECT * FROM Production.ProductProductPhoto', adventureworks_conn)
adventureworks_production_productreview = pd.read_sql('SELECT * FROM Production.ProductReview', adventureworks_conn)
adventureworks_production_productsubcategory = pd.read_sql('SELECT * FROM Production.ProductSubcategory', adventureworks_conn)
adventureworks_production_scrapreason = pd.read_sql('SELECT * FROM Production.ScrapReason', adventureworks_conn)
adventureworks_production_transactionhistory = pd.read_sql('SELECT * FROM Production.TransactionHistory', adventureworks_conn)
adventureworks_production_transactionhistoryarchive = pd.read_sql('SELECT * FROM Production.TransactionHistoryArchive', adventureworks_conn)
adventureworks_production_unitmeasure = pd.read_sql('SELECT * FROM Production.UnitMeasure', adventureworks_conn)
adventureworks_production_workorder = pd.read_sql('SELECT * FROM Production.WorkOrder', adventureworks_conn)
adventureworks_production_workorderrouting = pd.read_sql('SELECT * FROM Production.WorkOrderRouting', adventureworks_conn)

In [460]:
adventureworks_purchasing_productvendor = pd.read_sql('SELECT * FROM Purchasing.ProductVendor', adventureworks_conn)
adventureworks_purchasing_purchaseorderdetail = pd.read_sql('SELECT * FROM Purchasing.PurchaseOrderDetail', adventureworks_conn)
adventureworks_purchasing_purchaseorderheader = pd.read_sql('SELECT * FROM Purchasing.PurchaseOrderHeader', adventureworks_conn)
adventureworks_purchasing_shipmethod = pd.read_sql('SELECT * FROM Purchasing.ShipMethod', adventureworks_conn)
adventureworks_purchasing_vendor = pd.read_sql('SELECT * FROM Purchasing.Vendor', adventureworks_conn)

In [461]:
adventureworks_sales_countryregioncurrency = pd.read_sql('SELECT * FROM Sales.CountryRegionCurrency', adventureworks_conn)
adventureworks_sales_creditcard = pd.read_sql('SELECT * FROM Sales.CreditCard', adventureworks_conn)
adventureworks_sales_currency = pd.read_sql('SELECT * FROM Sales.Currency', adventureworks_conn)
adventureworks_sales_currencyrate = pd.read_sql('SELECT * FROM Sales.CurrencyRate', adventureworks_conn)
adventureworks_sales_customer = pd.read_sql('SELECT * FROM Sales.Customer', adventureworks_conn)
adventureworks_sales_personcreditcard = pd.read_sql('SELECT * FROM Sales.PersonCreditCard', adventureworks_conn)
adventureworks_sales_salesorderdetail = pd.read_sql('SELECT * FROM Sales.SalesOrderDetail', adventureworks_conn)
adventureworks_sales_salesorderheader = pd.read_sql('SELECT * FROM Sales.SalesOrderHeader', adventureworks_conn)
adventureworks_sales_salesorderhearerrsaleseason = pd.read_sql('SELECT * FROM Sales.SalesOrderHeaderSalesReason', adventureworks_conn)
adventureworks_sales_salesperson = pd.read_sql('SELECT * FROM Sales.SalesPerson', adventureworks_conn)
adventureworks_sales_salespersonquotahistory = pd.read_sql('SELECT * FROM Sales.SalesPersonQuotaHistory', adventureworks_conn)
adventureworks_sales_salesreason = pd.read_sql('SELECT * FROM Sales.SalesReason', adventureworks_conn)
adventureworks_sales_salestaxrate = pd.read_sql('SELECT * FROM Sales.SalesTaxRate', adventureworks_conn)
adventureworks_sales_salesterritory = pd.read_sql('SELECT * FROM Sales.SalesTerritory', adventureworks_conn)
adventureworks_sales_salesterritoryhistory = pd.read_sql('SELECT * FROM Sales.SalesTerritoryHistory', adventureworks_conn)
adventureworks_sales_shoppingcartitem = pd.read_sql('SELECT * FROM Sales.ShoppingCartItem', adventureworks_conn)
adventureworks_sales_specialoffer = pd.read_sql('SELECT * FROM Sales.SpecialOffer', adventureworks_conn)
adventureworks_sales_specialofferproduct = pd.read_sql('SELECT * FROM Sales.SpecialOfferProduct', adventureworks_conn)
adventureworks_sales_store = pd.read_sql('SELECT * FROM Sales.Store', adventureworks_conn)

adventureworks_conn.close()

## Combining the data
ORDER MATTERS, CAUSE SURROGATE KEYS
SK STILL NEEDS TO BE DONE

### Departments
Combining aenc and adventureworks department data

In [462]:
# adding DEPARTMENT_source_database columns to the dataframes
aenc_department['DEPARTMENT_source_database'] = 'aenc'
adventureworks_humanresources_department['DEPARTMENT_source_database'] = 'adventureworks'

# combining all department data
departments = pd.concat([aenc_department, adventureworks_humanresources_department], ignore_index=True)

# combining name and department name columns to create a name column
departments['DEPARTMENT_DEPARTMENT_DeptName'] = departments['Name'].combine_first(departments['dept_name'])
# combining dept_id and DepartmentID columns
departments['DEPARTMENT_DEPARTMENT_DeptID'] = departments['dept_id'].combine_first(departments['DepartmentID'])

# dropping the redundant columns
drop_modified_date_rowguid(departments)
departments.drop(columns=['dept_id', 'Name', 'dept_name', 'DepartmentID'], inplace=True)

# renaming the remaining columns
departments.rename(columns={'dept_head_id': 'DEPARTMENT_DEPARTMENT_DeptHeadID', 'GroupName': 'DEPARTMENT_DEPARTMENT_GroupName'}, inplace=True)

departments.head()

  departments['DEPARTMENT_DEPARTMENT_DeptID'] = departments['dept_id'].combine_first(departments['DepartmentID'])


Unnamed: 0,DEPARTMENT_DEPARTMENT_DeptHeadID,DEPARTMENT_source_database,DEPARTMENT_DEPARTMENT_GroupName,DEPARTMENT_DEPARTMENT_DeptName,DEPARTMENT_DEPARTMENT_DeptID
0,,adventureworks,Research and Development,Engineering,1.0
1,,adventureworks,Research and Development,Tool Design,2.0
2,,adventureworks,Sales and Marketing,Sales,3.0
3,,adventureworks,Sales and Marketing,Marketing,4.0
4,,adventureworks,Inventory Management,Purchasing,5.0


### Employee

In [463]:
# merge northwind with territory and employee
employee1 = pd.merge(northwind_employees, northwind_employee_territories, on='EmployeeID')
employee1 = employee1.drop(columns=['EmployeeID', 'LastName', 'FirstName', 'BirthDate', 'Address', 'City', 'Region', 'PostalCode', 'Country', 'HomePhone', 'ReportsTo'])

# merge employee from aenc with bonus
employee2 = pd.merge(aenc_employee, aenc_bonus, on='emp_id')

# after that concat together
employees = pd.concat([employee1, employee2], ignore_index=True)

# rename
employees.rename(columns={
    'emp_id': 'EMPLOYEE_EMPLOYEE_EmployeeID', 
    'dept_id': 'EMPLOYEE_EMPLOYEE_DepartmentID', 
    'manager_id': 'EMPLOYEE_EMPLOYEE_ManagerID', 
    'territory_id' : 'EMPLOYEE_EMPLOYEETERRITORIES_TerritoryID',
    'emp_fname' : 'EMPLOYEE_EMPLOYEE_Emp_Fname',
    'emp_lname' : 'EMPLOYEE_EMPLOYEE_Emp_Lname',
    'street' : 'EMPLOYEE_EMPLOYEE_Street',
    'city' : 'EMPLOYEE_EMPLOYEE_City',
    'state' : 'EMPLOYEE_EMPLOYEE_State',
    'zip_code' : 'EMPLOYEE_EMPLOYEE_Zip_Code',
    'phone' : 'EMPLOYEE_EMPLOYEE_Phone',
    'status' : 'EMPLOYEE_EMPLOYEE_Status',
    'ss_number' : 'EMPLOYEE_EMPLOYEE_SS_Number',
    'salary' : 'EMPLOYEE_EMPLOYEE_Salary',
    'start_date' : 'EMPLOYEE_EMPLOYEE_Start_Date',
    'termination_date' : 'EMPLOYEE_EMPLOYEE_Termination',
    'birth_date' : 'EMPLOYEE_EMPLOYEE_Birth_Date',
    'bene_health_ins' : 'EMPLOYEE_EMPLOYEE_Bene_Health_Ins',
    'bene_life_ins' : 'EMPLOYEE_EMPLOYEE_Bene_Life_Ins',
    'bene_day_care' : 'EMPLOYEE_EMPLOYEE_Bene_Day_Care',
    'sex' : 'EMPLOYEE_EMPLOYEE_Sex',
    'bonus_date' : 'EMPLOYEE_BONUS_Bonus_Date',
    'bonus_amount' : 'EMPLOYEE_BONUS_Bonus_Amount',
    'Title' : 'EMPLOYEE_EMPLOYEES_Title',
    'TitleOfCourtesy' : 'EMPLOYEE_EMPLOYEES_TitleOfCourtesy',
    'HireDate' : 'EMPLOYEE_EMPLOYEES_HireDate',
    'HomePhone' : 'EMPLOYEE_EMPLOYEES_HomePhone',
    'Extension' : 'EMPLOYEE_EMPLOYEES_Extension',
    'Photo' : 'EMPLOYEE_EMPLOYEES_Photo',
    'PhotoPath' : 'EMPLOYEE_EMPLOYEES_PhotoPath',
    'Notes' : 'EMPLOYEE_EMPLOYEES_Notes'
}, inplace=True)

desired_columns_order = ['EMPLOYEE_EMPLOYEE_EmployeeID', 'EMPLOYEE_EMPLOYEE_DepartmentID', 'EMPLOYEE_EMPLOYEE_ManagerID', 'EMPLOYEE_EMPLOYEETERRITORIES_TerritoryID','EMPLOYEE_EMPLOYEE_Emp_Fname','EMPLOYEE_EMPLOYEE_Emp_Lname','EMPLOYEE_EMPLOYEE_Street','EMPLOYEE_EMPLOYEE_City','EMPLOYEE_EMPLOYEE_State','EMPLOYEE_EMPLOYEE_Zip_Code','EMPLOYEE_EMPLOYEE_Phone','EMPLOYEE_EMPLOYEE_Status','EMPLOYEE_EMPLOYEE_SS_Number','EMPLOYEE_EMPLOYEE_Salary','EMPLOYEE_EMPLOYEE_Start_Date','EMPLOYEE_EMPLOYEE_Termination','EMPLOYEE_EMPLOYEE_Birth_Date','EMPLOYEE_EMPLOYEE_Bene_Health_Ins','EMPLOYEE_EMPLOYEE_Bene_Life_Ins','EMPLOYEE_EMPLOYEE_Bene_Day_Care','EMPLOYEE_EMPLOYEE_Sex','EMPLOYEE_BONUS_Bonus_Date','EMPLOYEE_BONUS_Bonus_Amount','EMPLOYEE_EMPLOYEES_Title','EMPLOYEE_EMPLOYEES_TitleOfCourtesy','EMPLOYEE_EMPLOYEES_HireDate','EMPLOYEE_EMPLOYEES_HomePhone','EMPLOYEE_EMPLOYEES_Extension','EMPLOYEE_EMPLOYEES_Photo','EMPLOYEE_EMPLOYEES_PhotoPath','EMPLOYEE_EMPLOYEES_Notes']

#order
employees = employees.reindex(columns=desired_columns_order)

employees.head()

Unnamed: 0,EMPLOYEE_EMPLOYEE_EmployeeID,EMPLOYEE_EMPLOYEE_DepartmentID,EMPLOYEE_EMPLOYEE_ManagerID,EMPLOYEE_EMPLOYEETERRITORIES_TerritoryID,EMPLOYEE_EMPLOYEE_Emp_Fname,EMPLOYEE_EMPLOYEE_Emp_Lname,EMPLOYEE_EMPLOYEE_Street,EMPLOYEE_EMPLOYEE_City,EMPLOYEE_EMPLOYEE_State,EMPLOYEE_EMPLOYEE_Zip_Code,...,EMPLOYEE_BONUS_Bonus_Date,EMPLOYEE_BONUS_Bonus_Amount,EMPLOYEE_EMPLOYEES_Title,EMPLOYEE_EMPLOYEES_TitleOfCourtesy,EMPLOYEE_EMPLOYEES_HireDate,EMPLOYEE_EMPLOYEES_HomePhone,EMPLOYEE_EMPLOYEES_Extension,EMPLOYEE_EMPLOYEES_Photo,EMPLOYEE_EMPLOYEES_PhotoPath,EMPLOYEE_EMPLOYEES_Notes
0,,,,,,,,,,,...,,,Sales Representative,Ms.,1992-05-01,,5467,b'\x15\x1c/\x00\x02\x00\x00\x00\r\x00\x0e\x00\...,http://accweb/emmployees/davolio.bmp,Education includes a BA in psychology from Col...
1,,,,,,,,,,,...,,,Sales Representative,Ms.,1992-05-01,,5467,b'\x15\x1c/\x00\x02\x00\x00\x00\r\x00\x0e\x00\...,http://accweb/emmployees/davolio.bmp,Education includes a BA in psychology from Col...
2,,,,,,,,,,,...,,,"Vice President, Sales",Dr.,1992-08-14,,3457,b'\x15\x1c/\x00\x02\x00\x00\x00\r\x00\x0e\x00\...,http://accweb/emmployees/fuller.bmp,Andrew received his BTS commercial in 1974 and...
3,,,,,,,,,,,...,,,"Vice President, Sales",Dr.,1992-08-14,,3457,b'\x15\x1c/\x00\x02\x00\x00\x00\r\x00\x0e\x00\...,http://accweb/emmployees/fuller.bmp,Andrew received his BTS commercial in 1974 and...
4,,,,,,,,,,,...,,,"Vice President, Sales",Dr.,1992-08-14,,3457,b'\x15\x1c/\x00\x02\x00\x00\x00\r\x00\x0e\x00\...,http://accweb/emmployees/fuller.bmp,Andrew received his BTS commercial in 1974 and...


### BusinessEntities

In [464]:
# Combining the adventureworks Person.BusinessEntity, BusinessEntityContact and ContactType data
businessentities= pd.merge(adventureworks_person_businessentity, adventureworks_person_businessentitycontact, left_on='BusinessEntityID', right_on='BusinessEntityID', suffixes=('_person_businessentity', '_businessentitycontact'), how="outer")

businessentities = pd.merge(businessentities, adventureworks_person_contacttype, left_on='ContactTypeID', right_on='ContactTypeID', suffixes=('', '_contacttype'), how="outer")

# dropping the modified date and rowguid columns
drop_modified_date_rowguid(businessentities)

# renaming the columns
businessentities.rename(columns={'BusinessEntityID': 'BUSINESSENTITY_BUSINESSENTITY_BusinessEntityID', 'PersonID': 'BUSINESSENTITY_BUSINESSENTITYCONTACT_PersonID', 'ContactTypeID': 'BUSINESSENTITY_CONTACTTYPE_ContactTypeID', 'Name' : 'BUSINESSENTITY_CONTACTTYPE_Name'}, inplace=True)

businessentities.head()

Unnamed: 0,BUSINESSENTITY_BUSINESSENTITY_BusinessEntityID,BUSINESSENTITY_BUSINESSENTITYCONTACT_PersonID,BUSINESSENTITY_CONTACTTYPE_ContactTypeID,BUSINESSENTITY_CONTACTTYPE_Name
0,,,1.0,Accounting Manager
1,1510.0,1509.0,2.0,Assistant Sales Agent
2,1518.0,1517.0,2.0,Assistant Sales Agent
3,1522.0,1521.0,2.0,Assistant Sales Agent
4,1528.0,1527.0,2.0,Assistant Sales Agent


### BusinessEntityAddresses

In [465]:
# Combining the adventureworks Person.BusinessEntityAddress, Address and AddressType data
businessentityaddresses = pd.merge(adventureworks_person_businessentityaddress, adventureworks_person_address, left_on='AddressID', right_on='AddressID', suffixes=('', '_address'), how="outer")

businessentityaddresses = pd.merge(businessentityaddresses, adventureworks_person_address_type, left_on='AddressTypeID', right_on='AddressTypeID', suffixes=('', '_address_type'), how="outer")


# dropping the modified date and rowguid columns
drop_modified_date_rowguid(businessentityaddresses)

# renaming the columns
businessentityaddresses.rename(columns={'BusinessEntityID': 'BUSINESSENTITYADDRESS_BUSINESSENTITYADDRESS_BusinessEntityID', 'AddressID': 'BUSINESSENTITYADDRESS_ADDRESS_AddressID', 'AddressTypeID': 'BUSINESSENTITYADDRESS_ADDRESSTYPE_AddressTypeID', 'AddressLine1' : 'BUSINESSENTITYADDRESS_ADDRESS_AddressLine1', 'AddressLine2' : 'BUSINESSENTITYADDRESS_ADDRESS_AddressLine2', 'City' : 'BUSINESSENTITYADDRESS_ADDRESS_City', 'StateProvinceID' : 'BUSINESSENTITYADDRESS_ADDRESS_StateProvinceID', 'PostalCode' : 'BUSINESSENTITYADDRESS_ADDRESS_POSTALCODE', 'SpatialLocation' : 'BUSINESSENTITYADDRESS_ADDRESS_SpatialLocation', 'Name' : 'BUSINESSENTITYADDRESS_ADDRESSTYPE_Name'}, inplace=True)

businessentityaddresses.head()

Unnamed: 0,BUSINESSENTITYADDRESS_BUSINESSENTITYADDRESS_BusinessEntityID,BUSINESSENTITYADDRESS_ADDRESS_AddressID,BUSINESSENTITYADDRESS_ADDRESSTYPE_AddressTypeID,BUSINESSENTITYADDRESS_ADDRESS_AddressLine1,BUSINESSENTITYADDRESS_ADDRESS_AddressLine2,BUSINESSENTITYADDRESS_ADDRESS_City,BUSINESSENTITYADDRESS_ADDRESS_StateProvinceID,BUSINESSENTITYADDRESS_ADDRESS_POSTALCODE,BUSINESSENTITYADDRESS_ADDRESS_SpatialLocation,BUSINESSENTITYADDRESS_ADDRESSTYPE_Name
0,,,1,,,,,,,Billing
1,12.0,1.0,2,1970 Napa Ct.,,Bothell,79.0,98011.0,POINT (-122.164644615406 47.7869921906598),Home
2,123.0,2.0,2,9833 Mt. Dias Blv.,,Bothell,79.0,98011.0,POINT (-122.250185528911 47.6867097047995),Home
3,285.0,3.0,2,7484 Roundtree Drive,,Bothell,79.0,98011.0,POINT (-122.274625789912 47.7631154083121),Home
4,251.0,4.0,2,9539 Glenside Dr,,Bothell,79.0,98011.0,POINT (-122.335726442416 47.7392386259644),Home


### People

In [466]:
# combining person data from adventureworks HumanResources.Person, PersonPhone, PhoneNumberType, EmailAddress and Password
people = pd.merge(adventureworks_person_person, adventureworks_person_personphone, left_on='BusinessEntityID', right_on='BusinessEntityID', suffixes= ('_person', '_personphone'), how="outer")

people = pd.merge(people, adventureworks_person_phonenumbertype, left_on='PhoneNumberTypeID', right_on='PhoneNumberTypeID', suffixes=('', '_phonenumbertype'), how="outer")

people = pd.merge(people, adventureworks_person_emailaddress, left_on='BusinessEntityID', right_on='BusinessEntityID', suffixes=('','_emailaddress'), how="outer")

people = pd.merge(people, adventureworks_person_password, left_on='BusinessEntityID', right_on='BusinessEntityID', suffixes=('','_password'), how="outer")

# dropping the modified date and rowguid columns
drop_modified_date_rowguid(people)

# renaming the columns
people.rename(columns={'BusinessEntityID': 'PERSON_PERSON_BusinessEntityID', 'PersonType': 'PERSON_PERSON_PersonType', 'NameStyle': 'PERSON_PERSON_NameStyle', 'Title': 'PERSON_PERSON_Title', 'FirstName': 'PERSON_PERSON_FirstName', 'MiddleName' : 'PERSON_PERSON_MiddleName', 'LastName' : 'PERSON_PERSON_LastName', 'Suffix': 'PERSON_PERSON_Suffix', 'EmailPromotion' : 'PERSON_PERSON_EmailPromotion', 'AdditionalContactInfo' : 'PERSON_PERSON_AdditionalContactInfo', 'Demographics' : 'PERSON_PERSON_Demographics', 'PhoneNumber' : 'PERSON_PERSONPHONE_PhoneNumber', 'PhoneNumberTypeID' : 'PERSON_PHONENUMBERTYPE_PhoneNumberTypeID', 'Name' : 'PERSON_PHONENUMBERTYPE_Name', 'EmailAddressID': 'PERSON_EMAILADDRESS_EmailAddressID', 'EmailAddress' : 'PERSON_EMAILADDRESS_EmailAddress', 'PasswordHash' : 'PERSON_PASSWORD_PasswordHash', 'PasswordSalt' : 'PERSON_PASSWORD_PasswordSalt'}, inplace=True)

people.head()

Unnamed: 0,PERSON_PERSON_BusinessEntityID,PERSON_PERSON_PersonType,PERSON_PERSON_NameStyle,PERSON_PERSON_Title,PERSON_PERSON_FirstName,PERSON_PERSON_MiddleName,PERSON_PERSON_LastName,PERSON_PERSON_Suffix,PERSON_PERSON_EmailPromotion,PERSON_PERSON_AdditionalContactInfo,PERSON_PERSON_Demographics,PERSON_PERSONPHONE_PhoneNumber,PERSON_PHONENUMBERTYPE_PhoneNumberTypeID,PERSON_PHONENUMBERTYPE_Name,PERSON_EMAILADDRESS_EmailAddressID,PERSON_EMAILADDRESS_EmailAddress,PERSON_PASSWORD_PasswordHash,PERSON_PASSWORD_PasswordSalt
0,1,EM,False,,Ken,J,Sánchez,,0,,"<IndividualSurvey xmlns=""http://schemas.micros...",697-555-0142,1,Cell,1,ken0@adventure-works.com,pbFwXWE99vobT6g+vPWFy93NtUU/orrIWafF01hccfM=,bE3XiWw=
1,2,EM,False,,Terri,Lee,Duffy,,1,,"<IndividualSurvey xmlns=""http://schemas.micros...",819-555-0175,3,Work,2,terri0@adventure-works.com,bawRVNrZQYQ05qF05Gz6VLilnviZmrqBReTTAGAudm0=,EjJaC3U=
2,3,EM,False,,Roberto,,Tamburello,,0,,"<IndividualSurvey xmlns=""http://schemas.micros...",212-555-0187,1,Cell,3,roberto0@adventure-works.com,8BUXrZfDqO1IyHCWOYzYmqN1IhTUn3CJMpdx/UCQ3iY=,wbPZqMw=
3,4,EM,False,,Rob,,Walters,,0,,"<IndividualSurvey xmlns=""http://schemas.micros...",612-555-0100,1,Cell,4,rob0@adventure-works.com,SjLXpiarHSlz+6AG+H+4QpB/IPRzras/+9q/5Wr7tf8=,PwSunQU=
4,5,EM,False,Ms.,Gail,A,Erickson,,0,,"<IndividualSurvey xmlns=""http://schemas.micros...",849-555-0139,1,Cell,5,gail0@adventure-works.com,8FYdAiY6gWuBsgjCFdg0UibtsqOcWHf9TyaHIP7+paA=,qYhZRiM=


### Products (TODO CHANGE)

In [467]:
# renaming aenc product columns to match the other product data
aenc_product.rename(columns={'id': 'ProductID', 'name' : 'ProductName', 'description':'Description' , 'prod_size' : 'ProdSize', 'color': 'Color', 'quantity': 'Quantity','unit_price' : 'UnitPrice'}, inplace=True)

aenc_product.head()

Unnamed: 0,ProductID,ProductName,Description,ProdSize,Color,Quantity,UnitPrice,picture_name,Category


In [468]:
# combining all adventureworks product data
adventureworks_combined_products = pd.concat([adventureworks_production_product, adventureworks_production_productcategory, adventureworks_production_productsubcategory, adventureworks_production_productdescription, adventureworks_production_productdocument, adventureworks_production_productmodel, adventureworks_production_productmodelillustration, adventureworks_production_productmodelproductdescriptionculture, adventureworks_production_productphoto, adventureworks_production_productproductphoto  ], ignore_index=True)

adventureworks_combined_products.head()

Unnamed: 0,ProductID,Name,ProductNumber,MakeFlag,FinishedGoodsFlag,Color,SafetyStockLevel,ReorderPoint,StandardCost,ListPrice,...,CatalogDescription,Instructions,IllustrationID,CultureID,ProductPhotoID,ThumbNailPhotoHexString,ThumbNailPhotoFileName,LargePhotoHexString,LargePhotoFileName,Primary
0,1.0,Adjustable Race,AR-5381,False,False,,1000.0,750.0,0.0,0.0,...,,,,,,,,,,
1,2.0,Bearing Ball,BA-8327,False,False,,1000.0,750.0,0.0,0.0,...,,,,,,,,,,
2,3.0,BB Ball Bearing,BE-2349,True,False,,800.0,600.0,0.0,0.0,...,,,,,,,,,,
3,4.0,Headset Ball Bearings,BE-2908,False,False,,800.0,600.0,0.0,0.0,...,,,,,,,,,,
4,316.0,Blade,BL-2036,True,False,,800.0,600.0,0.0,0.0,...,,,,,,,,,,


In [469]:
# Combining all product data
products = pd.concat([northwind_products, aenc_product, adventureworks_combined_products], ignore_index=True)

# replacing the document node with documentnodestring
products['DocumentNode'] = products['DocumentNodeString']
products.drop(columns=['DocumentNodeString'], inplace=True)

# applying the data types to the columns
products['ProductID'] = products['ProductID'].astype(float) # casting to float seems counterintuitive but it is necessary to avoid an error in Pandas

# dropping documentnode column (need to fix later)
products.drop(columns=['DocumentNode'], inplace=True)


products.head()

  products = pd.concat([northwind_products, aenc_product, adventureworks_combined_products], ignore_index=True)


Unnamed: 0,ProductID,ProductName,SupplierID,CategoryID,QuantityPerUnit,UnitPrice,UnitsInStock,UnitsOnOrder,ReorderLevel,Discontinued,...,CatalogDescription,Instructions,IllustrationID,CultureID,ProductPhotoID,ThumbNailPhotoHexString,ThumbNailPhotoFileName,LargePhotoHexString,LargePhotoFileName,Primary
0,1.0,Chai,1.0,1.0,10 boxes x 20 bags,18.0,39.0,0.0,10.0,False,...,,,,,,,,,,
1,2.0,Chang,1.0,1.0,24 - 12 oz bottles,19.0,17.0,40.0,25.0,False,...,,,,,,,,,,
2,3.0,Aniseed Syrup,1.0,2.0,12 - 550 ml bottles,10.0,13.0,70.0,25.0,False,...,,,,,,,,,,
3,4.0,Chef Anton's Cajun Seasoning,2.0,2.0,48 - 6 oz jars,22.0,53.0,0.0,0.0,False,...,,,,,,,,,,
4,5.0,Chef Anton's Gumbo Mix,2.0,2.0,36 boxes,21.35,0.0,0.0,0.0,True,...,,,,,,,,,,


### JobCandidate

In [470]:
jobcandidates = adventureworks_humanresources_jobcandidate

# dropping the modified date and rowguid columns
drop_modified_date_rowguid(jobcandidates)

# renaming the columns
jobcandidates.rename(columns={'JobCandidateID': 'JOBCANDIDATE_JOBCANDIDATE_JobCandidateID', 'BusinessEntityID': 'JOBCANDIDATE_JOBCANDIDATE_BusinessEntityID', 'Resume': 'JOBCANDIDATE_JOBCANDIDATE_Resume'}, inplace=True)

jobcandidates.head()

Unnamed: 0,JOBCANDIDATE_JOBCANDIDATE_JobCandidateID,JOBCANDIDATE_JOBCANDIDATE_BusinessEntityID,JOBCANDIDATE_JOBCANDIDATE_Resume
0,1,,"<ns:Resume xmlns:ns=""http://schemas.microsoft...."
1,2,,"<ns:Resume xmlns:ns=""http://schemas.microsoft...."
2,3,,"<ns:Resume xmlns:ns=""http://schemas.microsoft...."
3,4,274.0,"<ns:Resume xmlns:ns=""http://schemas.microsoft...."
4,5,,"<ns:Resume xmlns:ns=""http://schemas.microsoft...."


### EmployeeDepartmentHistory

In [471]:
employeedepartmenthistories = adventureworks_humanresources_employeedepartmenthistory

# dropping the modified date and rowguid columns
drop_modified_date_rowguid(employeedepartmenthistories)

# renaming the columns
employeedepartmenthistories.rename(columns={'BusinessEntityID': 'EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_BusinessEntityID', 'DepartmentID': 'EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_DepartmentID', 'ShiftID': 'EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_ShiftID', 'StartDate': 'EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_StartDate', 'EndDate': 'EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_EndDate'}, inplace=True)

employeedepartmenthistories.head()

Unnamed: 0,EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_BusinessEntityID,EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_DepartmentID,EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_ShiftID,EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_StartDate,EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_EndDate
0,1,16,1,2009-01-14,
1,2,1,1,2008-01-31,
2,3,1,1,2007-11-11,
3,4,1,1,2007-12-05,2010-05-30
4,4,2,1,2010-05-31,


### EmployeePayHistory

In [472]:
employeepayhistories = adventureworks_humanresources_employeepayhistory

# dropping the modified date and rowguid columns
drop_modified_date_rowguid(employeepayhistories)

# renaming the columns
employeepayhistories.rename(columns={'BusinessEntityID': 'EMPLOYEEPAYHISTORY_EMPLOYEEPAYHISTORY_BusinessEntityID', 'RateChangeDate': 'EMPLOYEEPAYHISTORY_EMPLOYEEPAYHISTORY_RateChangeDate', 'Rate': 'EMPLOYEEPAYHISTORY_EMPLOYEEPAYHISTORY_Rate', 'PayFrequency': 'EMPLOYEEPAYHISTORY_EMPLOYEEPAYHISTORY_PayFrequency'}, inplace=True)

employeepayhistories.head()

Unnamed: 0,EMPLOYEEPAYHISTORY_EMPLOYEEPAYHISTORY_BusinessEntityID,EMPLOYEEPAYHISTORY_EMPLOYEEPAYHISTORY_RateChangeDate,EMPLOYEEPAYHISTORY_EMPLOYEEPAYHISTORY_Rate,EMPLOYEEPAYHISTORY_EMPLOYEEPAYHISTORY_PayFrequency
0,1,2009-01-14,125.5,2
1,2,2008-01-31,63.4615,2
2,3,2007-11-11,43.2692,2
3,4,2007-12-05,8.62,2
4,4,2010-05-31,23.72,2


### Shift

In [473]:
shifts = adventureworks_humanresources_shift

# dropping the modified date and rowguid columns
drop_modified_date_rowguid(shifts)

# renaming the columns
shifts.rename(columns={'ShiftID': 'SHIFT_SHIFT_ShiftID', 'Name': 'SHIFT_SHIFT_Name', 'StartTime': 'SHIFT_SHIFT_StartTime', 'EndTime': 'SHIFT_SHIFT_EndTime'}, inplace=True)

shifts.head()

Unnamed: 0,SHIFT_SHIFT_ShiftID,SHIFT_SHIFT_Name,SHIFT_SHIFT_StartTime,SHIFT_SHIFT_EndTime
0,1,Day,07:00:00.0000000,15:00:00.0000000
1,2,Evening,15:00:00.0000000,23:00:00.0000000
2,3,Night,23:00:00.0000000,07:00:00.0000000


### SalesPerson

In [474]:
salespeople = adventureworks_sales_salesperson

# dropping the modified date and rowguid columns
drop_modified_date_rowguid(salespeople)

# renaming the columns
salespeople.rename(columns={'BusinessEntityID': 'SALESPERSON_SALESPERSON_BusinessEntityID', 'TerritoryID': 'SALESPERSON_SALESPERSON_TerritoryID', 'SalesQuota': 'SALESPERSON_SALESPERSON_SalesQuota', 'Bonus': 'SALESPERSON_SALESPERSON_Bonus', 'CommissionPct': 'SALESPERSON_SALESPERSON_CommissionPct', 'SalesYTD': 'SALESPERSON_SALESPERSON_SalesYTD', 'SalesLastYear': 'SALESPERSON_SALESPERSON_SalesLastYear'}, inplace=True)

salespeople.head()

Unnamed: 0,SALESPERSON_SALESPERSON_BusinessEntityID,SALESPERSON_SALESPERSON_TerritoryID,SALESPERSON_SALESPERSON_SalesQuota,SALESPERSON_SALESPERSON_Bonus,SALESPERSON_SALESPERSON_CommissionPct,SALESPERSON_SALESPERSON_SalesYTD,SALESPERSON_SALESPERSON_SalesLastYear
0,274,,,0.0,0.0,559697.6,0.0
1,275,2.0,300000.0,4100.0,0.012,3763178.0,1750406.0
2,276,4.0,250000.0,2000.0,0.015,4251369.0,1439156.0
3,277,3.0,250000.0,2500.0,0.015,3189418.0,1997186.0
4,278,6.0,250000.0,500.0,0.01,1453719.0,1620277.0


### ProductVendor

In [475]:
productvendors = adventureworks_purchasing_productvendor

# dropping the modified date and rowguid columns
drop_modified_date_rowguid(productvendors)

# renaming the columns
productvendors.rename(columns={'ProductID': 'PRODUCTVENDOR_PRODUCTVENDOR_ProductID', 'BusinessEntityID': 'PRODUCTVENDOR_PRODUCTVENDOR_BusinessEntityID', 'AverageLeadTime': 'PRODUCTVENDOR_PRODUCTVENDOR_AverageLeadTime', 'StandardPrice': 'PRODUCTVENDOR_PRODUCTVENDOR_StandardPrice', 'LastReceiptCost': 'PRODUCTVENDOR_PRODUCTVENDOR_LastReceiptCost', 'LastReceiptDate': 'PRODUCTVENDOR_PRODUCTVENDOR_LastReceiptDate', 'MinOrderQty': 'PRODUCTVENDOR_PRODUCTVENDOR_MinOrderQty', 'MaxOrderQty': 'PRODUCTVENDOR_PRODUCTVENDOR_MaxOrderQty', 'OnOrderQty': 'PRODUCTVENDOR_PRODUCTVENDOR_OnOrderQty', 'UnitMeasureCode' : 'PRODUCTVENDOR_PRODUCTVENDOR_UnitMeasureCode'}, inplace=True)

productvendors.head()

Unnamed: 0,PRODUCTVENDOR_PRODUCTVENDOR_ProductID,PRODUCTVENDOR_PRODUCTVENDOR_BusinessEntityID,PRODUCTVENDOR_PRODUCTVENDOR_AverageLeadTime,PRODUCTVENDOR_PRODUCTVENDOR_StandardPrice,PRODUCTVENDOR_PRODUCTVENDOR_LastReceiptCost,PRODUCTVENDOR_PRODUCTVENDOR_LastReceiptDate,PRODUCTVENDOR_PRODUCTVENDOR_MinOrderQty,PRODUCTVENDOR_PRODUCTVENDOR_MaxOrderQty,PRODUCTVENDOR_PRODUCTVENDOR_OnOrderQty,PRODUCTVENDOR_PRODUCTVENDOR_UnitMeasureCode
0,1,1580,17,47.87,50.2635,2011-08-29,1,5,3.0,CS
1,2,1688,19,39.92,41.916,2011-08-29,1,5,3.0,CTN
2,4,1650,17,54.31,57.0255,2011-08-29,1,5,,CTN
3,317,1578,19,28.17,29.5785,2011-08-29,100,1000,300.0,EA
4,317,1678,17,25.77,27.0585,2011-08-25,100,1000,,EA


### CustomerCustomerDemo

In [476]:
customercustomerdemos = northwind_customer_customer_demo

# dropping the modified date and rowguid columns
drop_modified_date_rowguid(customercustomerdemos)

# renaming the columns
customercustomerdemos.rename(columns={'CustomerID': 'CUSTOMERCUSTOMERDEMO_CUSTOMERCUSTOMERDEMO_CustomerID', 'CustomerTypeID': 'CUSTOMERCUSTOMERDEMO_CUSTOMERCUSTOMERDEMO_CustomerTypeID'}, inplace=True)

customercustomerdemos.head()

Unnamed: 0,CUSTOMERCUSTOMERDEMO_CUSTOMERCUSTOMERDEMO_CustomerID,CUSTOMERCUSTOMERDEMO_CUSTOMERCUSTOMERDEMO_CustomerTypeID


### CustomerDemographics

In [477]:
customerdemographics = northwind_customer_demographics

# dropping the modified date and rowguid columns
drop_modified_date_rowguid(customerdemographics)

# renaming the columns
customerdemographics.rename(columns={'CustomerTypeID': 'CUSTOMERDEMOGRAPHIC_CUSTOMERDEMOGRAPHICS_CustomerTypeID', 'CustomerDesc': 'CUSTOMERDEMOGRAPHIC_CUSTOMERDEMOGRAPHICS_CustomerDesc'}, inplace=True)

customerdemographics.head()

Unnamed: 0,CUSTOMERDEMOGRAPHIC_CUSTOMERDEMOGRAPHICS_CustomerTypeID,CUSTOMERDEMOGRAPHIC_CUSTOMERDEMOGRAPHICS_CustomerDesc


### SalesTerritoryHistory

In [478]:
salesterritoryhistories = adventureworks_sales_salesterritoryhistory

# dropping the modified date and rowguid columns
drop_modified_date_rowguid(salesterritoryhistories)

# renaming the columns
salesterritoryhistories.rename(columns={'BusinessEntityID': 'SALESTERRITORYHISTORY_SALESTERRITORYHISTORY_BusinessEntityID', 'TerritoryID': 'SALESTERRITORYHISTORY_SALESTERRITORYHISTORY_TerritoryID', 'StartDate': 'SALESTERRITORYHISTORY_SALESTERRITORYHISTORY_StartDate', 'EndDate': 'SALESTERRITORYHISTORY_SALESTERRITORYHISTORY_EndDate'}, inplace=True)

salesterritoryhistories.head()

Unnamed: 0,SALESTERRITORYHISTORY_SALESTERRITORYHISTORY_BusinessEntityID,SALESTERRITORYHISTORY_SALESTERRITORYHISTORY_TerritoryID,SALESTERRITORYHISTORY_SALESTERRITORYHISTORY_StartDate,SALESTERRITORYHISTORY_SALESTERRITORYHISTORY_EndDate
0,275,2,2011-05-31,2012-11-29
1,275,3,2012-11-30,NaT
2,276,4,2011-05-31,NaT
3,277,3,2011-05-31,2012-11-29
4,277,2,2012-11-30,NaT


### ProductListPriceHistory

In [479]:
productlistpricehistories = adventureworks_production_productlistpricehistory

# dropping the modified date and rowguid columns
drop_modified_date_rowguid(productlistpricehistories)

# renaming the columns
productlistpricehistories.rename(columns={'ProductID': 'PRODUCTLISTPRICEHISTORY_PRODUCTLISTPRICEHISTORY_ProductID', 'StartDate': 'PRODUCTLISTPRICEHISTORY_PRODUCTLISTPRICEHISTORY_StartDate', 'EndDate': 'PRODUCTLISTPRICEHISTORY_PRODUCTLISTPRICEHISTORY_EndDate', 'ListPrice': 'PRODUCTLISTPRICEHISTORY_PRODUCTLISTPRICEHISTORY_ListPrice'}, inplace=True)

productlistpricehistories.head()

Unnamed: 0,PRODUCTLISTPRICEHISTORY_PRODUCTLISTPRICEHISTORY_ProductID,PRODUCTLISTPRICEHISTORY_PRODUCTLISTPRICEHISTORY_StartDate,PRODUCTLISTPRICEHISTORY_PRODUCTLISTPRICEHISTORY_EndDate,PRODUCTLISTPRICEHISTORY_PRODUCTLISTPRICEHISTORY_ListPrice
0,707,2011-05-31,2012-05-29,33.6442
1,707,2012-05-30,2013-05-29,33.6442
2,707,2013-05-30,NaT,34.99
3,708,2011-05-31,2012-05-29,33.6442
4,708,2012-05-30,2013-05-29,33.6442


### ProductCostHistory

In [480]:
productcosthistories = adventureworks_production_productcosthistory

# dropping the modified date and rowguid columns
drop_modified_date_rowguid(productcosthistories)

# renaming the columns
productcosthistories.rename(columns={'ProductID': 'PRODUCTCOSTHISTORY_PRODUCTCOSTHISTORY_ProductID', 'StartDate': 'PRODUCTCOSTHISTORY_PRODUCTCOSTHISTORY_StartDate', 'EndDate': 'PRODUCTCOSTHISTORY_PRODUCTCOSTHISTORY_EndDate', 'StandardCost': 'PRODUCTCOSTHISTORY_PRODUCTCOSTHISTORY_StandardCost'}, inplace=True)

productcosthistories.head()

Unnamed: 0,PRODUCTCOSTHISTORY_PRODUCTCOSTHISTORY_ProductID,PRODUCTCOSTHISTORY_PRODUCTCOSTHISTORY_StartDate,PRODUCTCOSTHISTORY_PRODUCTCOSTHISTORY_EndDate,PRODUCTCOSTHISTORY_PRODUCTCOSTHISTORY_StandardCost
0,707,2011-05-31,2012-05-29,12.0278
1,707,2012-05-30,2013-05-29,13.8782
2,707,2013-05-30,NaT,13.0863
3,708,2011-05-31,2012-05-29,12.0278
4,708,2012-05-30,2013-05-29,13.8782


### ShoppingCartItem

In [481]:
shoppingcartitems = adventureworks_sales_shoppingcartitem

# dropping the modified date and rowguid columns
drop_modified_date_rowguid(shoppingcartitems)

# renaming the columns
shoppingcartitems.rename(columns={'ShoppingCartItemID': 'SHOPPINGCARTITEM_SHOPPINGCARTITEM_ShoppingCartItemID', 'ShoppingCartID': 'SHOPPINGCARTITEM_SHOPPINGCARTITEM_ShoppingCartID', 'Quantity': 'SHOPPINGCARTITEM_SHOPPINGCARTITEM_Quantity', 'ProductID': 'SHOPPINGCARTITEM_SHOPPINGCARTITEM_ProductID', 'DateCreated': 'SHOPPINGCARTITEM_SHOPPINGCARTITEM_DateCreated'}, inplace=True)

shoppingcartitems.head()

Unnamed: 0,SHOPPINGCARTITEM_SHOPPINGCARTITEM_ShoppingCartItemID,SHOPPINGCARTITEM_SHOPPINGCARTITEM_ShoppingCartID,SHOPPINGCARTITEM_SHOPPINGCARTITEM_Quantity,SHOPPINGCARTITEM_SHOPPINGCARTITEM_ProductID,SHOPPINGCARTITEM_SHOPPINGCARTITEM_DateCreated
0,2,14951,3,862,2013-11-09 17:54:07.603
1,4,20621,4,881,2013-11-09 17:54:07.603
2,5,20621,7,874,2013-11-09 17:54:07.603


### SalesPersonQuotaHistory

In [482]:
salespersonquotahistories = adventureworks_sales_salespersonquotahistory

# dropping the modified date and rowguid columns
drop_modified_date_rowguid(salespersonquotahistories)

# renaming the columns
salespersonquotahistories.rename(columns={'BusinessEntityID': 'SALESPERSONQUOTAHISTORY_SALESPERSONQUOTAHISTORY_BusinessEntityID', 'QuotaDate': 'SALESPERSONQUOTAHISTORY_SALESPERSONQUOTAHISTORY_QuotaDate', 'SalesQuota': 'SALESPERSONQUOTAHISTORY_SALESPERSONQUOTAHISTORY_SalesQuota'}, inplace=True)

salespersonquotahistories.head()

Unnamed: 0,SALESPERSONQUOTAHISTORY_SALESPERSONQUOTAHISTORY_BusinessEntityID,SALESPERSONQUOTAHISTORY_SALESPERSONQUOTAHISTORY_QuotaDate,SALESPERSONQUOTAHISTORY_SALESPERSONQUOTAHISTORY_SalesQuota
0,274,2011-05-31,28000.0
1,274,2011-08-31,7000.0
2,274,2011-12-01,91000.0
3,274,2012-02-29,140000.0
4,274,2012-05-30,70000.0


### Customer

In [483]:
aenc_customer = aenc_customer[['fname', 'lname', 'state']]
northwind_customers = northwind_customers.drop(columns='CustomerID')

# combining all customer data
customers = pd.concat([northwind_customers, aenc_customer, adventureworks_sales_customer], ignore_index=True)

# rename
customers.rename(columns={
    'CustomerID': 'CUSTOMER_CUSTOMERS_ID',
    'CompanyName': 'CUSTOMER_CUSTOMERS_CompanyName',
    'ContactName': 'CUSTOMER_CUSTOMERS_ContactName',
    'ContactTitle': 'CUSTOMER_CUSTOMERS_ContactTitle',
    'Address': 'CUSTOMER_CUSTOMERS_Address',
    'City': 'CUSTOMER_CUSTOMERS_City',
    'Region': 'CUSTOMER_CUSTOMERS_Region',
    'PostalCode': 'CUSTOMER_CUSTOMERS_PostalCode',
    'Country': 'CUSTOMER_CUSTOMERS_Country',
    'Phone': 'CUSTOMER_CUSTOMERS_Phone',
    'Fax': 'CUSTOMER_CUSTOMERS_Fax',
    'fname': 'CUSTOMER_CUSTOMER_Fname',
    'lname': 'CUSTOMER_CUSTOMER_Lname',
    'state': 'CUSTOMER_CUSTOMER_State',
    'PersonID': 'CUSTOMER_CUSTOMER_PersonID',
    'StoreID': 'CUSTOMER_CUSTOMER_StoreID',
    'TerritoryID': 'CUSTOMER_CUSTOMER_TerritoryID',
    'AccountNumber': 'CUSTOMER_CUSTOMER_AccountNumber'
}, inplace=True)

drop_modified_date_rowguid(customers)

customers.head()

Unnamed: 0,CUSTOMER_CUSTOMERS_CompanyName,CUSTOMER_CUSTOMERS_ContactName,CUSTOMER_CUSTOMERS_ContactTitle,CUSTOMER_CUSTOMERS_Address,CUSTOMER_CUSTOMERS_City,CUSTOMER_CUSTOMERS_Region,CUSTOMER_CUSTOMERS_PostalCode,CUSTOMER_CUSTOMERS_Country,CUSTOMER_CUSTOMERS_Phone,CUSTOMER_CUSTOMERS_Fax,CUSTOMER_CUSTOMER_Fname,CUSTOMER_CUSTOMER_Lname,CUSTOMER_CUSTOMER_State,CUSTOMER_CUSTOMERS_ID,CUSTOMER_CUSTOMER_PersonID,CUSTOMER_CUSTOMER_StoreID,CUSTOMER_CUSTOMER_TerritoryID,CUSTOMER_CUSTOMER_AccountNumber
0,Alfreds Futterkiste,Maria Anders,Sales Representative,Obere Str. 57,Berlin,,12209,Germany,030-0074321,030-0076545,,,,,,,,
1,Ana Trujillo Emparedados y helados,Ana Trujillo,Owner,Avda. de la Constitución 2222,México D.F.,,05021,Mexico,(5) 555-4729,(5) 555-3745,,,,,,,,
2,Antonio Moreno Taquería,Antonio Moreno,Owner,Mataderos 2312,México D.F.,,05023,Mexico,(5) 555-3932,,,,,,,,,
3,Around the Horn,Thomas Hardy,Sales Representative,120 Hanover Sq.,London,,WA1 1DP,UK,(171) 555-7788,(171) 555-6750,,,,,,,,
4,Berglunds snabbköp,Christina Berglund,Order Administrator,Berguvsvägen 8,Luleå,,S-958 22,Sweden,0921-12 34 65,0921-12 34 67,,,,,,,,


### SpecialOffer

In [484]:
# merge specialoffer with specialofferproduct
specialoffers = pd.merge(adventureworks_sales_specialoffer, adventureworks_sales_specialofferproduct, on="SpecialOfferID")

# drop rowguid and modifieddate
drop_modified_date_rowguid(specialoffers)

# rename to correct columns like in database
specialoffers = specialoffers.rename(columns={
    'SpecialOfferID': 'SPECIALOFFER_SPECIALOFFER_ID',
    'ProductID': 'SPECIALOFFER_SPECIALOFFERPRODUCT_ProductID',
    'Description': 'SPECIALOFFER_SPECIALOFFER_Description',
    'DiscountPct': 'SPECIALOFFER_SPECIALOFFER_DiscountPCT',
    'Type': 'SPECIALOFFER_SPECIALOFFER_Type',
    'Category': 'SPECIALOFFER_SPECIALOFFER_Category',
    'StartDate': 'SPECIALOFFER_SPECIALOFFER_StartDate',
    'EndDate': 'SPECIALOFFER_SPECIALOFFER_EndDate',
    'MinQty': 'SPECIALOFFER_SPECIALOFFER_MinQty',
    'MaxQty': 'SPECIALOFFER_SPECIALOFFER_MaxQty'    
})

specialoffers

Unnamed: 0,SPECIALOFFER_SPECIALOFFER_ID,SPECIALOFFER_SPECIALOFFER_Description,SPECIALOFFER_SPECIALOFFER_DiscountPCT,SPECIALOFFER_SPECIALOFFER_Type,SPECIALOFFER_SPECIALOFFER_Category,SPECIALOFFER_SPECIALOFFER_StartDate,SPECIALOFFER_SPECIALOFFER_EndDate,SPECIALOFFER_SPECIALOFFER_MinQty,SPECIALOFFER_SPECIALOFFER_MaxQty,SPECIALOFFER_SPECIALOFFERPRODUCT_ProductID
0,1,No Discount,0.0,No Discount,No Discount,2011-05-01,2014-11-30,0,,680
1,1,No Discount,0.0,No Discount,No Discount,2011-05-01,2014-11-30,0,,706
2,1,No Discount,0.0,No Discount,No Discount,2011-05-01,2014-11-30,0,,707
3,1,No Discount,0.0,No Discount,No Discount,2011-05-01,2014-11-30,0,,708
4,1,No Discount,0.0,No Discount,No Discount,2011-05-01,2014-11-30,0,,709
...,...,...,...,...,...,...,...,...,...,...
533,16,Mountain-500 Silver Clearance Sale,0.4,Discontinued Product,Reseller,2014-03-31,2014-05-30,0,,984
534,16,Mountain-500 Silver Clearance Sale,0.4,Discontinued Product,Reseller,2014-03-31,2014-05-30,0,,985
535,16,Mountain-500 Silver Clearance Sale,0.4,Discontinued Product,Reseller,2014-03-31,2014-05-30,0,,986
536,16,Mountain-500 Silver Clearance Sale,0.4,Discontinued Product,Reseller,2014-03-31,2014-05-30,0,,987


### CreditCard

In [485]:
# merge creditcard with personcreditcard
creditcards = pd.merge(adventureworks_sales_creditcard, adventureworks_sales_personcreditcard, on="CreditCardID")

# drop rowguid and modifieddate
drop_modified_date_rowguid(creditcards)

# rename to correct columns like in database
creditcards = creditcards.rename(columns={
    'CreditCardID': 'CREDITCARD_CREDITCARD_ID',
    'CardType': 'CREDITCARD_CREDITCARD_CardType',
    'CardNumber': 'CREDITCARD_CREDITCARD_CardNumber ',
    'ExpMonth': 'CREDITCARD_CREDITCARD_ExpMonth',
    'ExpYear': 'CREDITCARD_CREDITCARD_ExpYear',
    'BusinessEntityID': 'CREDITCARD_PERSONCREDITCARD_BusinessEntityID'    
})

creditcards.head()

Unnamed: 0,CREDITCARD_CREDITCARD_ID,CREDITCARD_CREDITCARD_CardType,CREDITCARD_CREDITCARD_CardNumber,CREDITCARD_CREDITCARD_ExpMonth,CREDITCARD_CREDITCARD_ExpYear,CREDITCARD_PERSONCREDITCARD_BusinessEntityID
0,1,SuperiorCard,33332664695310,11,2006,4955
1,2,Distinguish,55552127249722,8,2005,13222
2,3,ColonialVoice,77778344838353,7,2005,7082
3,4,ColonialVoice,77774915718248,7,2006,9347
4,5,Vista,11114404600042,4,2005,11277


### Supplier

In [486]:
# rename
suppliers = northwind_suppliers.rename(columns={
    'SupplierID': 'SUPPLIER_SUPPLIERS_SupplierID',
    'CompanyName': 'SUPPLIER_SUPPLIERS_CompanyName',
    'ContactName': 'SUPPLIER_SUPPLIERS_ContactName',
    'ContactTitle': 'SUPPLIER_SUPPLIERS_ContactTitle',
    'Address': 'SUPPLIER_SUPPLIERS_Address',
    'City': 'SUPPLIER_SUPPLIERS_City',
    'Region': 'SUPPLIER_SUPPLIERS_Region',
    'PostalCode': 'SUPPLIER_SUPPLIERS_PostalCode',
    'Country': 'SUPPLIER_SUPPLIERS_Country',
    'Phone': 'SUPPLIER_SUPPLIERS_Phone',
    'Fax': 'SUPPLIER_SUPPLIERS_Fax',
    'HomePage': 'SUPPLIER_SUPPLIERS_HomePage'
})

suppliers.head()

Unnamed: 0,SUPPLIER_SUPPLIERS_SupplierID,SUPPLIER_SUPPLIERS_CompanyName,SUPPLIER_SUPPLIERS_ContactName,SUPPLIER_SUPPLIERS_ContactTitle,SUPPLIER_SUPPLIERS_Address,SUPPLIER_SUPPLIERS_City,SUPPLIER_SUPPLIERS_Region,SUPPLIER_SUPPLIERS_PostalCode,SUPPLIER_SUPPLIERS_Country,SUPPLIER_SUPPLIERS_Phone,SUPPLIER_SUPPLIERS_Fax,SUPPLIER_SUPPLIERS_HomePage
0,1,Exotic Liquids,Charlotte Cooper,Purchasing Manager,49 Gilbert St.,London,,EC1 4SD,UK,(171) 555-2222,,
1,2,New Orleans Cajun Delights,Shelley Burke,Order Administrator,P.O. Box 78934,New Orleans,LA,70117,USA,(100) 555-4822,,#CAJUN.HTM#
2,3,Grandma Kelly's Homestead,Regina Murphy,Sales Representative,707 Oxford Rd.,Ann Arbor,MI,48104,USA,(313) 555-5735,(313) 555-3349,
3,4,Tokyo Traders,Yoshi Nagase,Marketing Manager,9-8 Sekimai Musashino-shi,Tokyo,,100,Japan,(03) 3555-5011,,
4,5,Cooperativa de Quesos 'Las Cabras',Antonio del Valle Saavedra,Export Administrator,Calle del Rosal 4,Oviedo,Asturias,33007,Spain,(98) 598 76 54,,


### Currency

In [487]:
# combine currency and countryregioncurrency
currencies = pd.merge(adventureworks_sales_currency, adventureworks_sales_countryregioncurrency, on="CurrencyCode")

# now combine with currencyrate
currencies = pd.concat([currencies, adventureworks_sales_currencyrate], ignore_index=True)

#drop the modifieddate
drop_modified_date_rowguid(currencies)

#rename 
currencies = currencies.rename(columns={
    'CurrencyCode': 'CURRENCY_CURRENCY_CurrencyCode',
    'Name': 'CURRENCY_CURRENCY_Name',
    'CountryRegionCode': 'CURRENCY_COUNTRYREGIONCURRENCY_CountryRegionCode',
    'CurrencyRateID': 'CURRENCY_CURRENCYRATE_CurrencyRateID',
    'CurrencyRateDate': 'CURRENCY_CURRENCYRATE_CurrencyRateDate',
    'FromCurrencyCode': 'CURRENCY_CURRENCYRATE_FromCurrencyCode',
    'ToCurrencyCode': 'CURRENCY_CURRENCYRATE_ToCurrencyCode',
    'AverageRate': 'CURRENCY_CURRENCYRATE_AverageRate ',
    'EndOfDayRate': 'CURRENCY_CURRENCYRATE_EndOfDayRate',
})

currencies.head()

Unnamed: 0,CURRENCY_CURRENCY_CurrencyCode,CURRENCY_CURRENCY_Name,CURRENCY_COUNTRYREGIONCURRENCY_CountryRegionCode,CURRENCY_CURRENCYRATE_CurrencyRateID,CURRENCY_CURRENCYRATE_CurrencyRateDate,CURRENCY_CURRENCYRATE_FromCurrencyCode,CURRENCY_CURRENCYRATE_ToCurrencyCode,CURRENCY_CURRENCYRATE_AverageRate,CURRENCY_CURRENCYRATE_EndOfDayRate
0,AED,Emirati Dirham,AE,,NaT,,,,
1,ARS,Argentine Peso,AR,,NaT,,,,
2,ATS,Shilling,AT,,NaT,,,,
3,AUD,Australian Dollar,AU,,NaT,,,,
4,BBD,Barbados Dollar,BB,,NaT,,,,


### Territory

In [488]:
# combine sales territory first
adventureworks_combined_territory = pd.merge(adventureworks_person_countryregion, adventureworks_person_stateprovince, on="CountryRegionCode", suffixes=('_pcr', '_sts'))

# now merge with salestaxrate
adventureworks_combined_territory = pd.merge(adventureworks_combined_territory, adventureworks_sales_salestaxrate, on="StateProvinceID", suffixes=('_st', '_sst'))

# combine northwind region with territory
nw_combined_territory = pd.merge(northwind_region, northwind_territories, on="RegionID")

# do the same for aenc
aenc_combined_regionstate = pd.merge(aenc_region, aenc_state, on="region")

#concatinate the tables together
territories = pd.concat([nw_combined_territory, aenc_combined_regionstate, adventureworks_combined_territory], ignore_index=True)

# drop unneeded rowguid and modifieddate
drop_modified_date_rowguid(territories)

# rename columns
territories = territories.rename(columns={
    'RegionID': 'TERRITORY_REGION_RegionID',
    'RegionDescription': 'TERRITORY_REGION_RegionDescription',
    'TerritoryID': 'TERRITORY_TERRITORIES_TerritoryID',
    'TerritoryDescription': 'TERRITORY_TERRITORIES_TerritoryDescription',
    'region': 'TERRITORY_REGION_Region',
    'state_id': 'TERRITORY_STATE_StateID',
    'state_name': 'TERRITORY_STATE_StateName',
    'state_capital': 'TERRITORY_STATE_StateCapital',
    'country': 'TERRITORY_STATE_Country',
    'CountryRegionCode': 'TERRITORY_COUNTRYREGION_CountryRegionCode',
    'Name_pcr': 'TERRITORY_COUNTRYREGION_Name',
    'StateProvinceID': 'TERRITORY_STATEPROVINCE_StateProvinceID',
    'StateProvinceCode': 'TERRITORY_STATEPROVINCE_StateProvinceCode',
    'IsOnlyStateProvinceFlag': 'TERRITORY_STATEPROVINCE_IsOnlyStateProvinceFlag',
    'Name_sts': 'TERRITORY_STATEPROVINCE_Name',
    'SalesTaxRateID': 'TERRITORY_SALESTAXRATE_SalesTaxRateID',
    'TaxType': 'TERRITORY_SALESTAXRATE_TaxType',
    'TaxRate': 'TERRITORY_SALESTAXRATE_TaxRate',
    'Name': 'TERRITORY_SALESTAXRATE_Name'
})

territories.head()

Unnamed: 0,TERRITORY_REGION_RegionID,TERRITORY_REGION_RegionDescription,TERRITORY_TERRITORIES_TerritoryID,TERRITORY_TERRITORIES_TerritoryDescription,TERRITORY_REGION_Region,TERRITORY_STATE_StateID,TERRITORY_STATE_StateName,TERRITORY_STATE_StateCapital,TERRITORY_STATE_Country,TERRITORY_COUNTRYREGION_CountryRegionCode,TERRITORY_COUNTRYREGION_Name,TERRITORY_STATEPROVINCE_StateProvinceID,TERRITORY_STATEPROVINCE_StateProvinceCode,TERRITORY_STATEPROVINCE_IsOnlyStateProvinceFlag,TERRITORY_STATEPROVINCE_Name,TERRITORY_SALESTAXRATE_SalesTaxRateID,TERRITORY_SALESTAXRATE_TaxType,TERRITORY_SALESTAXRATE_TaxRate,TERRITORY_SALESTAXRATE_Name
0,1.0,Eastern ...,1581,Westboro ...,,,,,,,,,,,,,,,
1,1.0,Eastern ...,1730,Bedford ...,,,,,,,,,,,,,,,
2,1.0,Eastern ...,1833,Georgetow ...,,,,,,,,,,,,,,,
3,1.0,Eastern ...,2116,Boston ...,,,,,,,,,,,,,,,
4,1.0,Eastern ...,2139,Cambridge ...,,,,,,,,,,,,,,,


### OrderHeader

In [489]:
# combine orderdetail with orderheader
orderheaders = pd.merge(adventureworks_sales_salesorderdetail, adventureworks_sales_salesorderheader, on="SalesOrderID", suffixes=('_sod', '_soh'))

# combine with salesorderheadersalesreason
orderheaders = pd.merge(orderheaders, adventureworks_sales_salesorderhearerrsaleseason, on="SalesOrderID")

# combine with salesreason
orderheaders = pd.merge(orderheaders, adventureworks_sales_salesreason, on="SalesReasonID")

#drop unneeded
drop_modified_date_rowguid(orderheaders)

# rename columns
orderheaders = orderheaders.rename(columns={
    'SalesOrderID': 'ORDERHEADER_SALESORDERDETAIL_SalesOrderID',
    'SalesOrderDetailID': 'ORDERHEADER_SALESORDERDETAIL_SalesOrderDetailID',
    'CarrierTrackingNumber': 'ORDERHEADER_SALESORDERDETAIL_CarrierTrackingNumber',
    'OrderQty': 'ORDERHEADER_SALESORDERDETAIL_OrderQty',
    'ProductID': 'ORDERHEADER_SALESORDERDETAIL_ProductID',
    'SpecialOfferID': 'ORDERHEADER_SALESORDERDETAIL_SpecialOfferID',
    'UnitPrice': 'ORDERHEADER_SALESORDERDETAIL_UnitPrice',
    'UnitPriceDiscount': 'ORDERHEADER_SALESORDERDETAIL_UnitPriceDiscount',
    'LineTotal': 'ORDERHEADER_SALESORDERDETAIL_LineTotal',
    'RevisionNumber': 'ORDERHEADER_SALESORDERHEADER_RevisionNumber',
    'OrderDate': 'ORDERHEADER_SALESORDERHEADER_OrderDate',
    'DueDate': 'ORDERHEADER_SALESORDERHEADER_DueDate',
    'ShipDate': 'ORDERHEADER_SALESORDERHEADER_ShipDate',
    'Status': 'ORDERHEADER_SALESORDERHEADER_Status',
    'OnlineOrderFlag': 'ORDERHEADER_SALESORDERHEADER_OnlineOrderFlag',
    'SalesOrderNumber': 'ORDERHEADER_SALESORDERHEADER_SalesOrderNumber',
    'PurchaseOrderNumber': 'ORDERHEADER_SALESORDERHEADER_PurchaseOrderNumber',
    'AccountNumber': 'ORDERHEADER_SALESORDERHEADER_AccountNumber',
    'CustomerID': 'ORDERHEADER_SALESORDERHEADER_CustomerID',
    'SalesPersonID': 'ORDERHEADER_SALESORDERHEADER_SalesPersonID',
    'TerritoryID': 'ORDERHEADER_SALESORDERHEADER_TerritoryID',
    'BillToAddressID': 'ORDERHEADER_SALESORDERHEADER_BillToAddress',
    'ShipToAddressID': 'ORDERHEADER_SALESORDERHEADER_ShipToAddress',
    'ShipMethodID': 'ORDERHEADER_SALESORDERHEADER_ShipMethodID',
    'CreditCardID': 'ORDERHEADER_SALESORDERHEADER_CreditCardID',
    'CreditCardApprovalCode': 'ORDERHEADER_SALESORDERHEADER_CreditCardApprovalCode',
    'CurrencyRateID': 'ORDERHEADER_SALESORDERHEADER_CurrencyRateID',
    'SubTotal': 'ORDERHEADER_SALESORDERHEADER_SubTotal',
    'TaxAmt': 'ORDERHEADER_SALESORDERHEADER_TaxAmt',
    'Freight': 'ORDERHEADER_SALESORDERHEADER_Freight',
    'TotalDue': 'ORDERHEADER_SALESORDERHEADER_TotalDue',
    'Comment': 'ORDERHEADER_SALESORDERHEADER_Comment',
    'SalesReasonID': 'ORDERHEADER_SALESREASON_SalesReasonID',
    'Name': 'ORDERHEADER_SALESREASON_Name',
    'ReasonType': 'ORDERHEADER_SALESREASON_ReasonType',
})

orderheaders.head()

Unnamed: 0,ORDERHEADER_SALESORDERDETAIL_SalesOrderID,ORDERHEADER_SALESORDERDETAIL_SalesOrderDetailID,ORDERHEADER_SALESORDERDETAIL_CarrierTrackingNumber,ORDERHEADER_SALESORDERDETAIL_OrderQty,ORDERHEADER_SALESORDERDETAIL_ProductID,ORDERHEADER_SALESORDERDETAIL_SpecialOfferID,ORDERHEADER_SALESORDERDETAIL_UnitPrice,ORDERHEADER_SALESORDERDETAIL_UnitPriceDiscount,ORDERHEADER_SALESORDERDETAIL_LineTotal,ORDERHEADER_SALESORDERHEADER_RevisionNumber,...,ORDERHEADER_SALESORDERHEADER_CreditCardApprovalCode,ORDERHEADER_SALESORDERHEADER_CurrencyRateID,ORDERHEADER_SALESORDERHEADER_SubTotal,ORDERHEADER_SALESORDERHEADER_TaxAmt,ORDERHEADER_SALESORDERHEADER_Freight,ORDERHEADER_SALESORDERHEADER_TotalDue,ORDERHEADER_SALESORDERHEADER_Comment,ORDERHEADER_SALESREASON_SalesReasonID,ORDERHEADER_SALESREASON_Name,ORDERHEADER_SALESREASON_ReasonType
0,43697,353,,1,749,1,3578.27,0.0,3578.27,8,...,530200Vi22686,4.0,3578.27,286.2616,89.4568,3953.9884,,5,Manufacturer,Other
1,43697,353,,1,749,1,3578.27,0.0,3578.27,8,...,530200Vi22686,4.0,3578.27,286.2616,89.4568,3953.9884,,9,Quality,Other
2,43702,358,,1,750,1,3578.27,0.0,3578.27,8,...,1230194Vi41919,,3578.27,286.2616,89.4568,3953.9884,,5,Manufacturer,Other
3,43702,358,,1,750,1,3578.27,0.0,3578.27,8,...,1230194Vi41919,,3578.27,286.2616,89.4568,3953.9884,,9,Quality,Other
4,43703,359,,1,749,1,3578.27,0.0,3578.27,8,...,1234632Vi48985,15.0,3578.27,286.2616,89.4568,3953.9884,,5,Manufacturer,Other


## Loading the data into the UnitedOutdoors datawarehouse

### Departments

In [490]:
departments_dtypes = {
    'DEPARTMENT_DEPARTMENT_DeptID': Integer,
    'DEPARTMENT_DEPARTMENT_DeptName': String(100),
    'DEPARTMENT_DEPARTMENT_GroupName': String(100),
    'DEPARTMENT_DEPARTMENT_DeptHeadID': Integer,
    'DEPARTMENT_source_database': String(100)
}

# TODO dept_head_id needs to refer to an employee
departments_nk_sk_dict  = prepare_and_insert_return_sk(departments, departments_dtypes, 'Department',united_outdoors_engine, 'DEPARTMENT_DEPARTMENT_DeptID', { 'DEPARTMENT_DEPARTMENT_DeptHeadID' : {}})

Replacing natural keys with surrogate keys for column: DEPARTMENT_DEPARTMENT_DeptHeadID
Inserting data into table: Department with chunk size: 400


### Employee

In [491]:
employees_dtypes = {
    'EMPLOYEE_EMPLOYEE_EmployeeID': Integer,
    'EMPLOYEE_EMPLOYEE_DepartmentID': Integer,
    'EMPLOYEE_EMPLOYEE_ManagerID': Integer,
    'EMPLOYEE_EMPLOYEETERRITORIES_TerritoryID': Integer,
    'EMPLOYEE_EMPLOYEE_Emp_Fname': NVARCHAR(255),
    'EMPLOYEE_EMPLOYEE_Emp_Lname': NVARCHAR(255),
    'EMPLOYEE_EMPLOYEE_Street': NVARCHAR(150),
    'EMPLOYEE_EMPLOYEE_City': NVARCHAR(100),
    'EMPLOYEE_EMPLOYEE_State': CHAR(1),
    'EMPLOYEE_EMPLOYEE_Zip_Code': CHAR(5),
    'EMPLOYEE_EMPLOYEE_Phone': Integer,
    'EMPLOYEE_EMPLOYEE_Status': CHAR(1),
    'EMPLOYEE_EMPLOYEE_SS_Number': Integer,
    'EMPLOYEE_EMPLOYEE_Salary': Integer,
    'EMPLOYEE_EMPLOYEE_Start_Date': DATE,
    'EMPLOYEE_EMPLOYEE_Termination': DATE,
    'EMPLOYEE_EMPLOYEE_Birth_Date': DATE,
    'EMPLOYEE_EMPLOYEE_Bene_Health_Ins': CHAR(1),
    'EMPLOYEE_EMPLOYEE_Bene_Life_Ins': CHAR(1),
    'EMPLOYEE_EMPLOYEE_Bene_Day_Care': CHAR(1),
    'EMPLOYEE_EMPLOYEE_Sex': CHAR(1),
    'EMPLOYEE_BONUS_Bonus_Date': DATE,
    'EMPLOYEE_BONUS_Bonus_Amount': Integer,
    'EMPLOYEE_EMPLOYEES_Title': NVARCHAR(50),
    'EMPLOYEE_EMPLOYEES_TitleOfCourtesy': NVARCHAR(50),
    'EMPLOYEE_EMPLOYEES_HireDate': DATE,
    'EMPLOYEE_EMPLOYEES_HomePhone': NVARCHAR(20),
    'EMPLOYEE_EMPLOYEES_Extension': Integer,
    'EMPLOYEE_EMPLOYEES_Photo': String,
    'EMPLOYEE_EMPLOYEES_PhotoPath': NVARCHAR(255),
    'EMPLOYEE_EMPLOYEES_Notes': String,
}

prepare_and_insert(employees, employees_dtypes, 'Employee', united_outdoors_engine)

Inserting data into table: Employee with chunk size: 64


### BusinessEntities

In [492]:
businessentities_dtypes = {
    'BUSINESSENTITY_BUSINESSENTITY_BusinessEntityID': Integer,
    'BUSINESSENTITY_BUSINESSENTITYCONTACT_PersonID': Integer,
    'BUSINESSENTITY_CONTACTTYPE_ContactTypeID': Integer,
    'BUSINESSENTITY_CONTACTTYPE_Name': String(100)
}

businessentities_nk_sk_dict = prepare_and_insert_return_sk(businessentities, businessentities_dtypes, 'BusinessEntity', united_outdoors_engine, 'BUSINESSENTITY_BUSINESSENTITY_BusinessEntityID')

Inserting data into table: BusinessEntity with chunk size: 500


### People

In [493]:
people_dtypes = {
    'PERSON_PERSON_BusinessEntityID': Integer,
    'PERSON_PERSON_PersonType': String(2),
    'PERSON_PERSON_NameStyle': BIT,
    'PERSON_PERSON_Title': String(100),
    'PERSON_PERSON_FirstName': String(100),
    'PERSON_PERSON_MiddleName': String(100),
    'PERSON_PERSON_LastName': String(100),
    'PERSON_PERSON_Suffix': String(100),
    'PERSON_PERSON_EmailPromotion': Integer,
    'PERSON_PERSON_AdditionalContactInfo': XML,
    'PERSON_PERSON_Demographics': XML,
    'PERSON_PERSONPHONE_PhoneNumber': String(100),
    'PERSON_PHONENUMBERTYPE_PhoneNumberTypeID': Integer,
    'PERSON_PHONENUMBERTYPE_Name': String(100),
    'PERSON_EMAILADDRESS_EmailAddressID': Integer,
    'PERSON_EMAILADDRESS_EmailAddress': String(100),
    'PERSON_PASSWORD_PasswordHash': LargeBinary,
    'PERSON_PASSWORD_PasswordSalt': LargeBinary
}

# Convert the 'PERSON_PASSWORD_PasswordHash' and 'PERSON_PASSWORD_PasswordSalt' columns to bytes
people['PERSON_PASSWORD_PasswordHash'] = people['PERSON_PASSWORD_PasswordHash'].apply(lambda x: x.encode('utf-8') if isinstance(x, str) else x)
people['PERSON_PASSWORD_PasswordSalt'] = people['PERSON_PASSWORD_PasswordSalt'].apply(lambda x: x.encode('utf-8') if isinstance(x, str) else x)

prepare_and_insert(people, people_dtypes, 'Person', united_outdoors_engine, { 'PERSON_PERSON_BusinessEntityID' : businessentities_nk_sk_dict})

Replacing natural keys with surrogate keys for column: PERSON_PERSON_BusinessEntityID
Inserting data into table: Person with chunk size: 111


### Updating the BusinessEntity table
replacing the natural keys with the surrogate keys for the PersonID column

In [494]:
prepare_and_update('BusinessEntity', united_outdoors_engine, { 'BUSINESSENTITY_BUSINESSENTITYCONTACT_PersonID' : businessentities_nk_sk_dict})

Updating data in table: BusinessEntity


### BusinessEntityAddresses

In [495]:
businessentityaddresses_dtypes = {
    'BUSINESSENTITYADDRESS_BUSINESSENTITYADDRESS_BusinessEntityID': Integer,
    'BUSINESSENTITYADDRESS_ADDRESSTYPE_AddressTypeID': Integer,
    'BUSINESSENTITYADDRESS_ADDRESSTYPE_Name': String(100),
    'BUSINESSENTITYADDRESS_ADDRESS_AddressID': Integer,
    'BUSINESSENTITYADDRESS_ADDRESS_AddressLine1': String(100),
    'BUSINESSENTITYADDRESS_ADDRESS_AddressLine2': String(100),
    'BUSINESSENTITYADDRESS_ADDRESS_City': String(100),
    'BUSINESSENTITYADDRESS_ADDRESS_POSTALCODE': String(100),
    'BUSINESSENTITYADDRESS_ADDRESS_SpatialLocation': VARCHAR,
    'BUSINESSENTITYADDRESS_ADDRESS_StateProvinceID': Integer
}

# TODO the StateProvinceID needs to refer to sk of Territory
prepare_and_insert(businessentityaddresses, businessentityaddresses_dtypes, 'BusinessEntityAddress',united_outdoors_engine, { 'BUSINESSENTITYADDRESS_BUSINESSENTITYADDRESS_BusinessEntityID' : businessentities_nk_sk_dict, 'BUSINESSENTITYADDRESS_ADDRESS_StateProvinceID' : {}})

Replacing natural keys with surrogate keys for column: BUSINESSENTITYADDRESS_BUSINESSENTITYADDRESS_BusinessEntityID
Replacing natural keys with surrogate keys for column: BUSINESSENTITYADDRESS_ADDRESS_StateProvinceID
Inserting data into table: BusinessEntityAddress with chunk size: 200


### Products (TODO CHANGE)

In [496]:
products_dtypes = {
    'ProductID': Integer,
    'ProductName': NVARCHAR(50),
    'SupplierID': Integer,
    'CategoryID': Integer,
    'QuantityPerUnit': NVARCHAR(30),
    'UnitPrice': MONEY,
    'UnitsInStock': Integer,
    'UnitsOnOrder': Integer,
    'ReorderLevel': Integer,
    'Discontinued': BIT,
    'Description': NVARCHAR,
    'ProdSize': NVARCHAR(50),
    'Color': NVARCHAR(15),
    'Quantity': Integer,
    'picture_name': NVARCHAR(50),
    'Category': NVARCHAR(20),
    'Name': NVARCHAR(50),
    'ProductNumber': NVARCHAR(25),
    'MakeFlag': BIT,
    'FinishedGoodsFlag': BIT,
    'SafetyStockLevel': Integer,
    'ReorderPoint': Integer,
    'StandardCost': DECIMAL(8,4),
    'ListPrice': MONEY,
    'Size': NVARCHAR(5),
    'SizeUnitMeasureCode': CHAR(2),
    'WeightUnitMeasureCode': NVARCHAR(3),
    'Weight': DECIMAL(8,2),
    'DaysToManufacture': Integer,
    'ProductLine': CHAR(1),
    'Class': CHAR(1),
    'Style': CHAR(1),
    'ProductSubcategoryID': Integer,
    'ProductModelID': Integer,
    'SellStartDate': DATE,
    'SellEndDate': DATE,
    'DiscontinuedDate': DATE,
    'ModifiedDate': DATE,
    'ProductCategoryID': Integer,
    'ProductDescriptionID': Integer,
    'CatalogDescription': XML,
    'Instructions': XML,
    'IllustrationID': Integer,
    'CultureID': NVARCHAR(10),
    'ProductPhotoID': Integer,
    'ThumbNailPhotoHexString': String,
    'ThumbnailPhotoFileName': NVARCHAR(50),
    'LargePhotoHexString': String,
    'LargePhotoFileName': NVARCHAR(50),
    'Primary': BIT
}

prepare_and_insert(products, products_dtypes, 'Product', united_outdoors_engine)


Inserting data into table: Product with chunk size: 39


PendingRollbackError: Can't reconnect until invalid transaction is rolled back.  Please rollback() fully before proceeding (Background on this error at: https://sqlalche.me/e/20/8s2b)

### JobCandidate

In [497]:
jobcandidates_dtypes = {
    'JOBCANDIDATE_JOBCANDIDATE_JobCandidateID': Integer,
    'JOBCANDIDATE_JOBCANDIDATE_BusinessEntityID': Integer,
    'JOBCANDIDATE_JOBCANDIDATE_Resume': XML
}

prepare_and_insert(jobcandidates, jobcandidates_dtypes, 'JobCandidate', united_outdoors_engine, { 'JOBCANDIDATE_JOBCANDIDATE_BusinessEntityID' : businessentities_nk_sk_dict})

Replacing natural keys with surrogate keys for column: JOBCANDIDATE_JOBCANDIDATE_BusinessEntityID
Inserting data into table: JobCandidate with chunk size: 666


### EmployeeDepartmentHistory

In [498]:
employeedepartmenthistories_dtypes = {
    'EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_BusinessEntityID': Integer,
    'EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_DepartmentID': Integer,
    'EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_ShiftID': Integer,
    'EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_StartDate': DATE,
    'EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_EndDate': DATE
}

prepare_and_insert(employeedepartmenthistories, employeedepartmenthistories_dtypes, 'EmployeeDepartmentHistory', united_outdoors_engine, { 'EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_BusinessEntityID' : businessentities_nk_sk_dict, 'EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_DepartmentID' : departments_nk_sk_dict, 'EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_ShiftID' : {}})

Replacing natural keys with surrogate keys for column: EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_BusinessEntityID
Replacing natural keys with surrogate keys for column: EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_DepartmentID
Replacing natural keys with surrogate keys for column: EMPLOYEEDEPARTMENTHISTORY_EMPLOYEEDEPARTMENTHISTORY_ShiftID
Inserting data into table: EmployeeDepartmentHistory with chunk size: 400


### EmployeePayHistory

In [499]:
employeepayhistories_dtypes = {
    'EMPLOYEEPAYHISTORY_EMPLOYEEPAYHISTORY_BusinessEntityID': Integer,
    'EMPLOYEEPAYHISTORY_EMPLOYEEPAYHISTORY_RateChangeDate': DATE,
    'EMPLOYEEPAYHISTORY_EMPLOYEEPAYHISTORY_Rate': MONEY,
    'EMPLOYEEPAYHISTORY_EMPLOYEEPAYHISTORY_PayFrequency': Integer
}

prepare_and_insert(employeepayhistories, employeepayhistories_dtypes, 'EmployeePayHistory', united_outdoors_engine, { 'EMPLOYEEPAYHISTORY_EMPLOYEEPAYHISTORY_BusinessEntityID' : businessentities_nk_sk_dict})

Replacing natural keys with surrogate keys for column: EMPLOYEEPAYHISTORY_EMPLOYEEPAYHISTORY_BusinessEntityID
Inserting data into table: EmployeePayHistory with chunk size: 500


### Shift

In [500]:
shifts_dtypes = {
    'SHIFT_SHIFT_ShiftID': Integer,
    'SHIFT_SHIFT_Name': String(100),
    'SHIFT_SHIFT_StartTime': TIME,
    'SHIFT_SHIFT_EndTime': TIME
}

prepare_and_insert(shifts, shifts_dtypes, 'Shift', united_outdoors_engine)

Inserting data into table: Shift with chunk size: 500


### SalesPerson

In [501]:
salespeople_dtypes = {
    'SALESPERSON_SALESPERSON_BusinessEntityID': Integer,
    'SALESPERSON_SALESPERSON_TerritoryID': Integer,
    'SALESPERSON_SALESPERSON_SalesQuota': MONEY,
    'SALESPERSON_SALESPERSON_Bonus': MONEY,
    'SALESPERSON_SALESPERSON_CommissionPct': DECIMAL(8,4),
    'SALESPERSON_SALESPERSON_SalesYTD': MONEY,
    'SALESPERSON_SALESPERSON_SalesLastYear': MONEY
}

prepare_and_insert(salespeople, salespeople_dtypes, 'SalesPerson', united_outdoors_engine, { 'SALESPERSON_SALESPERSON_BusinessEntityID' : businessentities_nk_sk_dict, 'SALESPERSON_SALESPERSON_TerritoryID' : {}})

Replacing natural keys with surrogate keys for column: SALESPERSON_SALESPERSON_BusinessEntityID
Replacing natural keys with surrogate keys for column: SALESPERSON_SALESPERSON_TerritoryID
Inserting data into table: SalesPerson with chunk size: 285


### ProductVendor

In [503]:
productvendors_dtypes = {
    'PRODUCTVENDOR_PRODUCTVENDOR_ProductID': Integer,
    'PRODUCTVENDOR_PRODUCTVENDOR_BusinessEntityID': Integer,
    'PRODUCTVENDOR_PRODUCTVENDOR_AverageLeadTime': Integer,
    'PRODUCTVENDOR_PRODUCTVENDOR_StandardPrice': MONEY,
    'PRODUCTVENDOR_PRODUCTVENDOR_LastReceiptCost': MONEY,
    'PRODUCTVENDOR_PRODUCTVENDOR_LastReceiptDate': DATE,
    'PRODUCTVENDOR_PRODUCTVENDOR_MinOrderQty': Integer,
    'PRODUCTVENDOR_PRODUCTVENDOR_MaxOrderQty': Integer,
    'PRODUCTVENDOR_PRODUCTVENDOR_OnOrderQty': Integer,
    'PRODUCTVENDOR_PRODUCTVENDOR_UnitMeasureCode': CHAR(3)
}

prepare_and_insert(productvendors, productvendors_dtypes, 'ProductVendor', united_outdoors_engine, { 'PRODUCTVENDOR_PRODUCTVENDOR_ProductID' : {}, 'PRODUCTVENDOR_PRODUCTVENDOR_BusinessEntityID' : businessentities_nk_sk_dict, 'PRODUCTVENDOR_PRODUCTVENDOR_UnitMeasureCode' : {}})

Replacing natural keys with surrogate keys for column: PRODUCTVENDOR_PRODUCTVENDOR_ProductID
Replacing natural keys with surrogate keys for column: PRODUCTVENDOR_PRODUCTVENDOR_BusinessEntityID
Replacing natural keys with surrogate keys for column: PRODUCTVENDOR_PRODUCTVENDOR_UnitMeasureCode
Inserting data into table: ProductVendor with chunk size: 200


### CustomerDemographics

In [504]:
customerdemographics_dtypes = {
    'CUSTOMERDEMOGRAPHIC_CUSTOMERDEMOGRAPHICS_CustomerTypeID': NVARCHAR(10),
    'CUSTOMERDEMOGRAPHIC_CUSTOMERDEMOGRAPHICS_CustomerDesc': String(100)
}

customerdemographics_nk_sk_dict = prepare_and_insert_return_sk(customerdemographics, customerdemographics_dtypes, 'CustomerDemographic',united_outdoors_engine, 'CUSTOMERDEMOGRAPHIC_CUSTOMERDEMOGRAPHICS_CustomerTypeID')

Inserting data into table: CustomerDemographic with chunk size: 1000


### CustomerCustomerDemo

In [505]:
customercustomerdemos_dtypes = {
    'CUSTOMERCUSTOMERDEMO_CUSTOMERCUSTOMERDEMO_CustomerID': NVARCHAR(10),
    'CUSTOMERCUSTOMERDEMO_CUSTOMERCUSTOMERDEMO_CustomerTypeID': NVARCHAR(10)
}

# TODO how to handle fk? the nk is nvarchar, not an integer

prepare_and_insert(customercustomerdemos, customercustomerdemos_dtypes, 'CustomerCustomerDemo', united_outdoors_engine, { 'CUSTOMERCUSTOMERDEMO_CUSTOMERCUSTOMERDEMO_CustomerTypeID' : customerdemographics_nk_sk_dict, 'CUSTOMERCUSTOMERDEMO_CUSTOMERCUSTOMERDEMO_CustomerID' : {}})

Replacing natural keys with surrogate keys for column: CUSTOMERCUSTOMERDEMO_CUSTOMERCUSTOMERDEMO_CustomerTypeID
Replacing natural keys with surrogate keys for column: CUSTOMERCUSTOMERDEMO_CUSTOMERCUSTOMERDEMO_CustomerID
Inserting data into table: CustomerCustomerDemo with chunk size: 1000


### SalesTerritoryHistory

In [506]:
salesterritoryhistories_dtypes = {
    'SALESTERRITORYHISTORY_SALESTERRITORYHISTORY_BusinessEntityID': Integer,
    'SALESTERRITORYHISTORY_SALESTERRITORYHISTORY_TerritoryID': Integer,
    'SALESTERRITORYHISTORY_SALESTERRITORYHISTORY_StartDate': DATE,
    'SALESTERRITORYHISTORY_SALESTERRITORYHISTORY_EndDate': DATE
}

prepare_and_insert(salesterritoryhistories, salesterritoryhistories_dtypes, 'SalesTerritoryHistory', united_outdoors_engine, { 'SALESTERRITORYHISTORY_SALESTERRITORYHISTORY_BusinessEntityID' : businessentities_nk_sk_dict, 'SALESTERRITORYHISTORY_SALESTERRITORYHISTORY_TerritoryID' : {}})

Replacing natural keys with surrogate keys for column: SALESTERRITORYHISTORY_SALESTERRITORYHISTORY_BusinessEntityID
Replacing natural keys with surrogate keys for column: SALESTERRITORYHISTORY_SALESTERRITORYHISTORY_TerritoryID
Inserting data into table: SalesTerritoryHistory with chunk size: 500


### ProductListPriceHistory

In [507]:
productlistpricehistories_dtypes = {
    'PRODUCTLISTPRICEHISTORY_PRODUCTLISTPRICEHISTORY_ProductID': Integer,
    'PRODUCTLISTPRICEHISTORY_PRODUCTLISTPRICEHISTORY_StartDate': DATE,
    'PRODUCTLISTPRICEHISTORY_PRODUCTLISTPRICEHISTORY_EndDate': DATE,
    'PRODUCTLISTPRICEHISTORY_PRODUCTLISTPRICEHISTORY_ListPrice': MONEY
}

prepare_and_insert(productlistpricehistories, productlistpricehistories_dtypes, 'ProductListPriceHistory', united_outdoors_engine, { 'PRODUCTLISTPRICEHISTORY_PRODUCTLISTPRICEHISTORY_ProductID' : {}})

Replacing natural keys with surrogate keys for column: PRODUCTLISTPRICEHISTORY_PRODUCTLISTPRICEHISTORY_ProductID
Inserting data into table: ProductListPriceHistory with chunk size: 500


### ProductCostHistory

In [508]:
productcosthistories_dtypes = {
    'PRODUCTCOSTHISTORY_PRODUCTCOSTHISTORY_ProductID': Integer,
    'PRODUCTCOSTHISTORY_PRODUCTCOSTHISTORY_StartDate': DATE,
    'PRODUCTCOSTHISTORY_PRODUCTCOSTHISTORY_EndDate': DATE,
    'PRODUCTCOSTHISTORY_PRODUCTCOSTHISTORY_StandardCost': MONEY
}

prepare_and_insert(productcosthistories, productcosthistories_dtypes, 'ProductCostHistory', united_outdoors_engine, { 'PRODUCTCOSTHISTORY_PRODUCTCOSTHISTORY_ProductID' : {}})

Replacing natural keys with surrogate keys for column: PRODUCTCOSTHISTORY_PRODUCTCOSTHISTORY_ProductID
Inserting data into table: ProductCostHistory with chunk size: 500


### ShoppingCartItem

In [509]:
shoppingcartitems_dtypes = {
    'SHOPPINGCARTITEM_SHOPPINGCARTITEM_ShoppingCartItemID': Integer,
    'SHOPPINGCARTITEM_SHOPPINGCARTITEM_ShoppingCartID': Integer,
    'SHOPPINGCARTITEM_SHOPPINGCARTITEM_ProductID': Integer,
    'SHOPPINGCARTITEM_SHOPPINGCARTITEM_Quantity': Integer,
    'SHOPPINGCARTITEM_SHOPPINGCARTITEM_DateCreated': DATE
}

prepare_and_insert(shoppingcartitems, shoppingcartitems_dtypes, 'ShoppingCartItem', united_outdoors_engine, { 'SHOPPINGCARTITEM_SHOPPINGCARTITEM_ProductID' : {}, 'SHOPPINGCARTITEM_SHOPPINGCARTITEM_ShoppingCartID' : {}})

Replacing natural keys with surrogate keys for column: SHOPPINGCARTITEM_SHOPPINGCARTITEM_ProductID
Replacing natural keys with surrogate keys for column: SHOPPINGCARTITEM_SHOPPINGCARTITEM_ShoppingCartID
Inserting data into table: ShoppingCartItem with chunk size: 400


### SalesPersonQuotaHistory

In [510]:
salespersonquotahistories_dtypes = {
    'SALESPERSONQUOTAHISTORY_SALESPERSONQUOTAHISTORY_BusinessEntityID': Integer,
    'SALESPERSONQUOTAHISTORY_SALESPERSONQUOTAHISTORY_QuotaDate': DATE,
    'SALESPERSONQUOTAHISTORY_SALESPERSONQUOTAHISTORY_SalesQuota': MONEY
}

prepare_and_insert(salespersonquotahistories, salespersonquotahistories_dtypes, 'SalesPersonQuotaHistory', united_outdoors_engine, { 'SALESPERSONQUOTAHISTORY_SALESPERSONQUOTAHISTORY_BusinessEntityID' : businessentities_nk_sk_dict})

Replacing natural keys with surrogate keys for column: SALESPERSONQUOTAHISTORY_SALESPERSONQUOTAHISTORY_BusinessEntityID
Inserting data into table: SalesPersonQuotaHistory with chunk size: 666


### Customers

In [511]:
customers_dtypes = {
    'CUSTOMER_CUSTOMERS_ID': Integer,
    'CUSTOMER_CUSTOMER_PersonID': Integer,
    'CUSTOMER_CUSTOMER_StoreID': Integer,
    'CUSTOMER_CUSTOMER_TerritoryID': Integer,
    'CUSTOMER_CUSTOMER_AccountNumber': CHAR(10),
    'CUSTOMER_CUSTOMERS_CompanyName': NVARCHAR(100),
    'CUSTOMER_CUSTOMERS_ContactName': NVARCHAR(255),
    'CUSTOMER_CUSTOMERS_ContactTitle': NVARCHAR(100),
    'CUSTOMER_CUSTOMERS_Address': NVARCHAR(255),
    'CUSTOMER_CUSTOMERS_City': NVARCHAR(100),
    'CUSTOMER_CUSTOMERS_Region': NVARCHAR(50),
    'CUSTOMER_CUSTOMERS_PostalCode': NVARCHAR(20),
    'CUSTOMER_CUSTOMERS_Country': NVARCHAR(150),
    'CUSTOMER_CUSTOMERS_Phone': NVARCHAR(24),
    'CUSTOMER_CUSTOMERS_Fax': NVARCHAR(24),
    'CUSTOMER_CUSTOMER_Fname': NVARCHAR(255),
    'CUSTOMER_CUSTOMER_Lname': NVARCHAR(255),
    'CUSTOMER_CUSTOMER_State': CHAR(2)
}

customers_nk_sk_dict = prepare_and_insert_return_sk(customers, customers_dtypes, 'Customer', united_outdoors_engine, 'CUSTOMER_CUSTOMERS_ID', {'CUSTOMER_CUSTOMER_PersonID': {}, 'CUSTOMER_CUSTOMER_StoreID': {}, 'CUSTOMER_CUSTOMER_TerritoryID': {}})

Replacing natural keys with surrogate keys for column: CUSTOMER_CUSTOMER_PersonID
Replacing natural keys with surrogate keys for column: CUSTOMER_CUSTOMER_StoreID
Replacing natural keys with surrogate keys for column: CUSTOMER_CUSTOMER_TerritoryID
Inserting data into table: Customer with chunk size: 111


Exception during reset or similar
Traceback (most recent call last):
  File "c:\Users\yunyi\AppData\Local\pypoetry\Cache\virtualenvs\clodsire-2cKgtQrF-py3.12\Lib\site-packages\sqlalchemy\pool\base.py", line 986, in _finalize_fairy
    fairy._reset(
  File "c:\Users\yunyi\AppData\Local\pypoetry\Cache\virtualenvs\clodsire-2cKgtQrF-py3.12\Lib\site-packages\sqlalchemy\pool\base.py", line 1432, in _reset
    pool._dialect.do_rollback(self)
  File "c:\Users\yunyi\AppData\Local\pypoetry\Cache\virtualenvs\clodsire-2cKgtQrF-py3.12\Lib\site-packages\sqlalchemy\dialects\mssql\base.py", line 3117, in do_rollback
    super().do_rollback(dbapi_connection)
  File "c:\Users\yunyi\AppData\Local\pypoetry\Cache\virtualenvs\clodsire-2cKgtQrF-py3.12\Lib\site-packages\sqlalchemy\engine\default.py", line 698, in do_rollback
    dbapi_connection.rollback()
pyodbc.Error: ('01000', '[01000] [Microsoft][ODBC SQL Server Driver][DBMSLPCN]ConnectionWrite (WrapperWrite()). (233) (SQLEndTran); [01000] [Microsoft][ODB

### SpecialOffer

In [512]:
specialoffers_dtypes = {
    'SPECIALOFFER_SPECIALOFFER_ID': Integer,
    'SPECIALOFFER_SPECIALOFFER_Description': String,
    'SPECIALOFFER_SPECIALOFFER_DiscountPCT': Integer,
    'SPECIALOFFER_SPECIALOFFER_Type': NVARCHAR(100),
    'SPECIALOFFER_SPECIALOFFER_Category': NVARCHAR(100),
    'SPECIALOFFER_SPECIALOFFER_StartDate': DATE,
    'SPECIALOFFER_SPECIALOFFER_EndDate': DATE,
    'SPECIALOFFER_SPECIALOFFER_MinQty': Integer,
    'SPECIALOFFER_SPECIALOFFER_MaxQty': Integer,
    'SPECIALOFFER_SPECIALOFFERPRODUCT_ProductID': Integer
}

specialoffer_nk_sk_dict = prepare_and_insert_return_sk(specialoffers, specialoffers_dtypes, 'SpecialOffer', united_outdoors_engine, 'SPECIALOFFER_SPECIALOFFER_ID')

Inserting data into table: SpecialOffer with chunk size: 200


### CreditCard

In [513]:
creditcards_dtypes = {
    'CREDITCARD_CREDITCARD_ID': Integer,
    'CREDITCARD_CREDITCARD_CardType': NVARCHAR(100),
    'CREDITCARD_CREDITCARD_CardNumber': BigInteger,
    'CREDITCARD_CREDITCARD_ExpMonth': Integer,
    'CREDITCARD_CREDITCARD_ExpYear': Integer,
    'CREDITCARD_PERSONCREDITCARD_BusinessEntityID': Integer
}

creditcards_nk_sk_dict = prepare_and_insert_return_sk(creditcards, creditcards_dtypes, 'CreditCard', united_outdoors_engine, 'CREDITCARD_CREDITCARD_ID')

Inserting data into table: CreditCard with chunk size: 333


### Supplier

In [514]:
suppliers_dtypes = {
    'SUPPLIER_SUPPLIERS_SupplierID': Integer,
    'SUPPLIER_SUPPLIERS_CompanyName': NVARCHAR(100),
    'SUPPLIER_SUPPLIERS_ContactName': NVARCHAR(255),
    'SUPPLIER_SUPPLIERS_ContactTitle': NVARCHAR(100),
    'SUPPLIER_SUPPLIERS_Address': NVARCHAR(150),
    'SUPPLIER_SUPPLIERS_City': NVARCHAR(50),
    'SUPPLIER_SUPPLIERS_Region': NVARCHAR(50),
    'SUPPLIER_SUPPLIERS_PostalCode': NVARCHAR(20),
    'SUPPLIER_SUPPLIERS_Country': NVARCHAR(100),
    'SUPPLIER_SUPPLIERS_Phone': VARCHAR(20),
    'SUPPLIER_SUPPLIERS_Fax': NVARCHAR(30),
    'SUPPLIER_SUPPLIERS_HomePage': NVARCHAR(255)
}

prepare_and_insert(suppliers, suppliers_dtypes, 'Supplier', united_outdoors_engine)

Inserting data into table: Supplier with chunk size: 166


### Currency

In [515]:
currencies_dtypes = {
    'CURRENCY_CURRENCY_CurrencyCode': NVARCHAR(10),
    'CURRENCY_CURRENCY_Name': NVARCHAR(100),
    'CURRENCY_COUNTRYREGIONCURRENCY_CountryRegionCode': NVARCHAR(10),
    'CURRENCY_CURRENCYRATE_CurrencyRateID': Integer,
    'CURRENCY_CURRENCYRATE_CurrencyRateDate': DATE,
    'CURRENCY_CURRENCYRATE_FromCurrencyCode': NVARCHAR(10),
    'CURRENCY_CURRENCYRATE_ToCurrencyCode': NVARCHAR(10),
    'CURRENCY_CURRENCYRATE_AverageRate': Integer,
    'CURRENCY_CURRENCYRATE_EndOfDayRate': Integer
}

prepare_and_insert(currencies, currencies_dtypes, 'Currency', united_outdoors_engine, {'CURRENCY_CURRENCYRATE_CurrencyRateID': {}, 'CURRENCY_CURRENCYRATE_FromCurrencyCode': {}, 'CURRENCY_CURRENCYRATE_ToCurrencyCode': {}})

Replacing natural keys with surrogate keys for column: CURRENCY_CURRENCYRATE_CurrencyRateID
Replacing natural keys with surrogate keys for column: CURRENCY_CURRENCYRATE_FromCurrencyCode
Replacing natural keys with surrogate keys for column: CURRENCY_CURRENCYRATE_ToCurrencyCode
Inserting data into table: Currency with chunk size: 222


### Territory

In [516]:
territories_dtypes = {
    'TERRITORY_REGION_RegionID': Integer,
    'TERRITORY_REGION_RegionDescription': String,
    'TERRITORY_TERRITORIES_TerritoryID': Integer,
    'TERRITORY_TERRITORIES_TerritoryDescription': String,
    'TERRITORY_REGION_Region': NVARCHAR(50),
    'TERRITORY_STATE_StateID': NVARCHAR(10),
    'TERRITORY_STATE_StateName': NVARCHAR(100),
    'TERRITORY_STATE_StateCapital': NVARCHAR(50),
    'TERRITORY_STATE_Country': NVARCHAR(50),
    'TERRITORY_COUNTRYREGION_CountryRegionCode': NVARCHAR(10),
    'TERRITORY_COUNTRYREGION_Name': NVARCHAR(100),
    'TERRITORY_STATEPROVINCE_StateProvinceID': Integer,
    'TERRITORY_STATEPROVINCE_StateProvinceCode': NVARCHAR(10),
    'TERRITORY_STATEPROVINCE_IsOnlyStateProvinceFlag': CHAR(1),
    'TERRITORY_STATEPROVINCE_Name': NVARCHAR(100),
    'TERRITORY_SALESTAXRATE_SalesTaxRateID': Integer,
    'TERRITORY_SALESTAXRATE_TaxType': NVARCHAR(50),
    'TERRITORY_SALESTAXRATE_TaxRate': DECIMAL(8,2),
    'TERRITORY_SALESTAXRATE_Name': NVARCHAR(150)
}

prepare_and_insert(territories, territories_dtypes, 'Territory', united_outdoors_engine, {'TERRITORY_TERRITORIES_TerritoryID': {}, 'TERRITORY_STATE_StateID': {}, 'TERRITORY_COUNTRYREGION_CountryRegionCode': {}, 'TERRITORY_STATEPROVINCE_StateProvinceID': {}, 'TERRITORY_SALESTAXRATE_SalesTaxRateID': {}})

Replacing natural keys with surrogate keys for column: TERRITORY_TERRITORIES_TerritoryID
Replacing natural keys with surrogate keys for column: TERRITORY_STATE_StateID
Replacing natural keys with surrogate keys for column: TERRITORY_COUNTRYREGION_CountryRegionCode
Replacing natural keys with surrogate keys for column: TERRITORY_STATEPROVINCE_StateProvinceID
Replacing natural keys with surrogate keys for column: TERRITORY_SALESTAXRATE_SalesTaxRateID
Inserting data into table: Territory with chunk size: 105


### OrderHeader

In [517]:
orderheaders_dtypes = {
    'ORDERHEADER_SALESORDERDETAIL_SalesOrderID': Integer,
    'ORDERHEADER_SALESORDERDETAIL_SalesOrderDetailID': Integer,
    'ORDERHEADER_SALESORDERDETAIL_CarrierTrackingNumber': NVARCHAR(25),
    'ORDERHEADER_SALESORDERDETAIL_OrderQty': Integer,
    'ORDERHEADER_SALESORDERDETAIL_ProductID': Integer,
    'ORDERHEADER_SALESORDERDETAIL_SpecialOfferID': Integer,
    'ORDERHEADER_SALESORDERDETAIL_UnitPrice': DECIMAL(8,2),
    'ORDERHEADER_SALESORDERDETAIL_UnitPriceDiscount': DECIMAL(8,2),
    'ORDERHEADER_SALESORDERDETAIL_LineTotal': NVARCHAR,
    'ORDERHEADER_SALESORDERHEADER_RevisionNumber': Integer,
    'ORDERHEADER_SALESORDERHEADER_OrderDate': DATE,
    'ORDERHEADER_SALESORDERHEADER_DueDate': DATE,
    'ORDERHEADER_SALESORDERHEADER_ShipDate': DATE,
    'ORDERHEADER_SALESORDERHEADER_Status': CHAR(1),
    'ORDERHEADER_SALESORDERHEADER_OnlineOrderFlag': BIT,
    'ORDERHEADER_SALESORDERHEADER_SalesOrderNumber': VARCHAR(100),
    'ORDERHEADER_SALESORDERHEADER_PurchaseOrderNumber': VARCHAR(25),
    'ORDERHEADER_SALESORDERHEADER_AccountNumber': NVARCHAR(15),
    'ORDERHEADER_SALESORDERHEADER_CustomerID': Integer,
    'ORDERHEADER_SALESORDERHEADER_SalesPersonID': Integer,
    'ORDERHEADER_SALESORDERHEADER_TerritoryID': Integer,
    'ORDERHEADER_SALESORDERHEADER_BillToAddress': Integer,
    'ORDERHEADER_SALESORDERHEADER_ShipToAddress': Integer,
    'ORDERHEADER_SALESORDERHEADER_ShipMethodID': Integer,
    'ORDERHEADER_SALESORDERHEADER_CreditCardID': Integer,
    'ORDERHEADER_SALESORDERHEADER_CreditCardApprovalCode': NVARCHAR(15),
    'ORDERHEADER_SALESORDERHEADER_CurrencyRateID': Integer,
    'ORDERHEADER_SALESORDERHEADER_SubTotal': DECIMAL(11,2),
    'ORDERHEADER_SALESORDERHEADER_TaxAmt': DECIMAL(11,2),
    'ORDERHEADER_SALESORDERHEADER_Freight': DECIMAL(11,2),
    'ORDERHEADER_SALESORDERHEADER_TotalDue': DECIMAL(11,2),
    'ORDERHEADER_SALESORDERHEADER_Comment': NVARCHAR(128),
    'ORDERHEADER_SALESORDERHEADERSALESREASON_SalesOrderID': Integer,
    'ORDERHEADER_SALESREASON_SalesReasonID': Integer,
    'ORDERHEADER_SALESREASON_Name': NVARCHAR(100),
    'ORDERHEADER_SALESREASON_ReasonType': NVARCHAR(100),
}

prepare_and_insert(orderheaders, orderheaders_dtypes, 'OrderHeader', united_outdoors_engine, { 'ORDERHEADER_SALESORDERHEADER_CustomerID' : {}, 'ORDERHEADER_SALESORDERDETAIL_SpecialOfferID' : specialoffer_nk_sk_dict, 'ORDERHEADER_SALESORDERHEADER_CreditCardID': creditcards_nk_sk_dict, 'ORDERHEADER_SALESORDERHEADER_CurrencyRateID': {}})

Replacing natural keys with surrogate keys for column: ORDERHEADER_SALESORDERHEADER_CustomerID
Replacing natural keys with surrogate keys for column: ORDERHEADER_SALESORDERDETAIL_SpecialOfferID
Replacing natural keys with surrogate keys for column: ORDERHEADER_SALESORDERHEADER_CreditCardID
Replacing natural keys with surrogate keys for column: ORDERHEADER_SALESORDERHEADER_CurrencyRateID
Inserting data into table: OrderHeader with chunk size: 57


## Constraints
altering the tables to add the (foreign key) constraints

In [518]:
# opening the UnitedOutdoors_constraints.sql file
with open('sql/UnitedOutdoors_constraints.sql', 'r') as file:
    sql_script = file.read()

# Execute the script
split_and_execute_sql_script(sql_script, united_outdoors_engine)

# Closing connections

In [519]:
try:
    united_outdoors_engine.dispose()
    northwind_engine.dispose()
    aenc_engine.dispose()
    adventureworks_engine.dispose()
except OperationalError as e:
        print(f'Error: {e}')

# Time elapsed

In [520]:
end_time = time.time()

print(f'Time elapsed: {end_time - start_time} seconds')

Time elapsed: 1635.9290704727173 seconds
