# United outdoors datawarehouse

## Imports

In [421]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, BigInteger, Integer, String, LargeBinary, VARCHAR, NVARCHAR, DECIMAL, CHAR, \
    DATE
from sqlalchemy.dialects.mssql import BIT, XML, MONEY
from sqlalchemy.exc import OperationalError
from urllib import parse
import re
import time

In [422]:
start_time = time.time()

## Database connection details

In [423]:
DB = {
    'servername' : '(local)\\SQLEXPRESS',
    'united_outdoors_database' : 'UnitedOutdoors',
    'northwind_database' : 'Northwind',
    'aenc_database' : 'Aenc',
    'adventureworks_database' : 'AdventureWorks2019',
    'master' : 'master'
}

In [424]:
def create_connection(servername, database):
    params = parse.quote_plus(f'DRIVER={{SQL Server}};SERVER={servername};DATABASE={database};Trusted_Connection=yes')
    engine = create_engine(f'mssql+pyodbc:///?odbc_connect={params}', use_setinputsizes=False, connect_args={'options': '-c search_path=dbo'}, fast_executemany=True) # setinputsizes needs to be turned off for sql server, idk why but gives errors otherwise
    try:
        establish_conn = engine.connect()
        print(f'Connection to {database} database successful')
        return establish_conn, engine
    except OperationalError as e:
        print(f'Error: {e}')
        return None, None

In [425]:
def split_and_execute_sql_script(script, execute_engine):
    # creating a connection
    connection = execute_engine.connect()
    
    # splitting the script into the database creation and the rest
    commands = re.split(r'GO\n', script)
    # removing all \bGO\b from the commands
    commands = [re.sub(r'\bGO\b', '', command) for command in commands]
    
    # Execute the commands
    for command in commands:
        command = command.strip()
        # Skip if the command is empty or 'GO'
        if not command or command.upper() == 'GO':
            continue
        try:
            connection.connection.execute(command)
            connection.connection.commit()
            #print(f'Command executed: {command}')
        except OperationalError as e:
            print(f'Error: {e} at command: {command}')

In [426]:
def bulk_insert(df, dtypes, table_name, engine):
    # calculating chunk size
    chunk_size = (2000 // len(df.columns))  # 2100 is the maximum number of parameters in a query, -100 for safety

    print(f'Inserting data into table: {table_name} with chunk size: {chunk_size}')
    df.to_sql(name=table_name, schema='dbo', con=engine, if_exists='append', index=False, dtype=dtypes, method='multi', chunksize= chunk_size)

In [427]:
def prepare(dataframe, nk_sk_dict=None):
    # replacing the natural keys with the surrogate keys
    if nk_sk_dict:
        for column in nk_sk_dict:
            # Check for duplicate keys
            if len(nk_sk_dict[column]) != len(set(nk_sk_dict[column])):
                raise ValueError(f'Duplicate keys found in nk_sk_dict for column: {column}')
            else:
                print(f'Replacing natural keys with surrogate keys for column: {column}')
                for natural_key in nk_sk_dict[column]:
                    dataframe[column] = dataframe[column].replace(natural_key, nk_sk_dict[column][natural_key])
    
    # replace empty values with None
    dataframe = dataframe.where(pd.notnull(dataframe), None)
    dataframe = dataframe.replace({np.nan: None})
    
    # stripping all columns with string data
    dataframe = dataframe.map(lambda x: x.strip() if isinstance(x, str) else x)
    # replacing all empty strings with None
    dataframe = dataframe.replace(r'^\s*$', None, regex=True)

In [428]:
def prepare_and_insert(dataframe, dtypes,table_name, nk_sk_dict=None):
    """
    Prepares the dataframe for insertion into the database and inserts it into the database.
        @param dataframe: The dataframe to be inserted into the database
        @param dtypes: The data types of the columns in the dataframe
        @param table_name: The name of the table in the database
        @param nk_sk_dict: A 3d dictionary containing the natural keys and their corresponding surrogate keys, per column (so nk_sk_dict[column][natural_key] = surrogate_key)
    """
       
    prepare(dataframe, nk_sk_dict)
    
    # adding the data to the database
    bulk_insert(dataframe, dtypes, table_name, united_outdoors_engine)

In [429]:
def prepare_and_insert_return_sk(dataframe, dtypes, table_name, natural_key_column, nk_sk_dict=None):
    """
    Prepares the dataframe for insertion into the database and inserts it into the database.
        @param dataframe: The dataframe to be inserted into the database
        @param dtypes: The data types of the columns in the dataframe
        @param table_name: The name of the table in the database
        @param natural_key_column: The name of the column containing the natural keys
        @param nk_sk_dict: A 3d dictionary containing the natural keys and their corresponding surrogate keys, per column (so nk_sk_dict[column][natural_key] = surrogate_key)
        @return: A dictionary containing the natural keys and their corresponding surrogate keys
    """
    
    prepare(dataframe, nk_sk_dict)
    
    # adding the data to the database
    bulk_insert(dataframe, dtypes, table_name, united_outdoors_engine)
    
    # making a cleaned dictionary without None/nan values in the natural_key_column
    mask = dataframe[natural_key_column].notnull()
    filtered_dataframe = dataframe[mask]
    
    # getting the natural keys and their corresponding surrogate keys
    # TODO FIX THIS, relying on the dataframe index is not a good idea
    nk_sk_dict = dict(zip(filtered_dataframe[natural_key_column], dataframe.index))
    
    # adding +1 to the index to get the surrogate key
    nk_sk_dict = {k: v + 1 for k, v in nk_sk_dict.items()}
    
    return nk_sk_dict

In [430]:
def prepare_and_update(table_name, update_engine, nk_sk_dict=None):   
    # creating a connection
    connection = update_engine.connect()

    # Start a transaction
    trans = connection.begin()

    try:
        # adding the data to the database
        print(f'Updating data in table: {table_name}')
        for column in nk_sk_dict:
            # Prepare a batch update statement
            update_stmts = []
            for natural_key in nk_sk_dict[column]:
                update_stmts.append(f'UPDATE {table_name} SET {column} = {nk_sk_dict[column][natural_key]} WHERE {column} = {int(natural_key)}')

                # If the number of parameters reaches the limit, execute the batch update and clear the list
                if len(update_stmts) * 2 >= 2000:  # Each update statement has 2 parameters, cause of the Column and Where clause
                    connection.connection.execute(";".join(update_stmts))
                    update_stmts = []

            # Execute the remaining update statements
            if update_stmts:
                connection.connection.execute(";".join(update_stmts))

        # Commit the transaction
        trans.commit()
    except:
        # Rollback the transaction in case of error
        trans.rollback()
        raise
    finally:
        # Close the connection
        connection.close()

In [431]:
def drop_modified_date_rowguid(dataframe):
    # dropping all columns with 'rowguid' in their name
    columns_to_drop_mr = dataframe.filter(like='rowguid').columns
    
    # dropping all columns with 'ModifiedDate' in their name
    columns_to_drop_mr = columns_to_drop_mr.append(dataframe.filter(like='ModifiedDate').columns)
    
    # dropping the columns
    dataframe.drop(columns=columns_to_drop_mr, inplace=True)

## Create the UnitedOutdoors datawarehouse

In [432]:
_, creation_engine = create_connection(DB["servername"], DB["master"])

# Open the SQL script file and read its contents
with open('sql/UnitedOutdoors_creation.sql', 'r') as file:
    sql_script = file.read()

split_and_execute_sql_script(sql_script, creation_engine)

creation_engine.dispose()

Connection to master database successful


## Connecting to the UnitedOutdoors datawarehouse

In [433]:
_ , united_outdoors_engine = create_connection(DB["servername"], DB["united_outdoors_database"])

Connection to UnitedOutdoors database successful


## Loading the data from the source databases

### Northwind database

#### Connection

In [434]:
northwind_conn, northwind_engine = create_connection(DB["servername"], DB["northwind_database"])

Connection to Northwind database successful


#### Loading data

In [435]:
# Load the data from the source database
northwind_categories = pd.read_sql('SELECT * FROM Categories', northwind_conn)
northwind_customer_customer_demo = pd.read_sql('SELECT * FROM CustomerCustomerDemo', northwind_conn)
northwind_customer_demographics = pd.read_sql('SELECT * FROM CustomerDemographics', northwind_conn)
northwind_customers = pd.read_sql('SELECT * FROM Customers', northwind_conn)
northwind_employees = pd.read_sql('SELECT * FROM Employees', northwind_conn)
northwind_employee_territories = pd.read_sql('SELECT * FROM EmployeeTerritories', northwind_conn)
northwind_order_details = pd.read_sql('SELECT * FROM [Order Details]', northwind_conn)
northwind_orders = pd.read_sql('SELECT * FROM Orders', northwind_conn)
northwind_products = pd.read_sql('SELECT * FROM Products', northwind_conn)
northwind_region = pd.read_sql('SELECT * FROM Region', northwind_conn)
northwind_shippers = pd.read_sql('SELECT * FROM Shippers', northwind_conn)
northwind_suppliers = pd.read_sql('SELECT * FROM Suppliers', northwind_conn)
northwind_territories = pd.read_sql('SELECT * FROM Territories', northwind_conn)

northwind_conn.close()

### Aenc database

#### Connection

In [436]:
aenc_conn , aenc_engine = create_connection(DB["servername"], DB["aenc_database"])

Connection to Aenc database successful


#### Loading data

In [437]:
aenc_bonus = pd.read_sql('SELECT * FROM Bonus', aenc_conn)
aenc_customer = pd.read_sql('SELECT * FROM Customer', aenc_conn)
aenc_department = pd.read_sql('SELECT * FROM Department', aenc_conn)
aenc_employee = pd.read_sql('SELECT * FROM Employee', aenc_conn)
aenc_product = pd.read_sql('SELECT * FROM Product', aenc_conn)
aenc_region = pd.read_sql('SELECT * FROM Region', aenc_conn)
aenc_sales_order = pd.read_sql('SELECT * FROM SalesOrder', aenc_conn)
aenc_sales_order_item = pd.read_sql('SELECT * FROM SalesOrderItem', aenc_conn)
aenc_state = pd.read_sql('SELECT * FROM State', aenc_conn)

aenc_conn.close()

### AdventureWorks database

#### Connection

In [438]:
adventureworks_conn, adventureworks_engine = create_connection(DB["servername"], DB["adventureworks_database"])

Connection to AdventureWorks2019 database successful


#### Loading data

In [439]:
adventureworks_humanresources_department = pd.read_sql('SELECT * FROM HumanResources.Department', adventureworks_conn)
adventureworks_humanresources_employee = pd.read_sql('SELECT * FROM HumanResources.Employee', adventureworks_conn)
adventureworks_humanresources_employeedepartmenthistory = pd.read_sql('SELECT * FROM HumanResources.EmployeeDepartmentHistory', adventureworks_conn)
adventureworks_humanresources_employeepayhistory = pd.read_sql('SELECT * FROM HumanResources.EmployeePayHistory', adventureworks_conn)
adventureworks_humanresources_jobcandidate = pd.read_sql('SELECT * FROM HumanResources.JobCandidate', adventureworks_conn)
adventureworks_humanresources_shift = pd.read_sql('SELECT * FROM HumanResources.Shift', adventureworks_conn)

In [440]:
adventureworks_person_address = pd.read_sql('SELECT AddressID, AddressLine1, AddressLine2, City, StateProvinceID, PostalCode, CAST(SpatialLocation AS VARCHAR(MAX)) AS SpatialLocation,rowguid, ModifiedDate   FROM Person.Address', adventureworks_conn)
adventureworks_person_address_type = pd.read_sql('SELECT * FROM Person.AddressType', adventureworks_conn)
adventureworks_person_businessentity = pd.read_sql('SELECT * FROM Person.BusinessEntity', adventureworks_conn)
adventureworks_person_businessentityaddress = pd.read_sql('SELECT * FROM Person.BusinessEntityAddress', adventureworks_conn)
adventureworks_person_businessentitycontact = pd.read_sql('SELECT * FROM Person.BusinessEntityContact', adventureworks_conn)
adventureworks_person_contacttype = pd.read_sql('SELECT * FROM Person.ContactType', adventureworks_conn)
adventureworks_person_countryregion = pd.read_sql('SELECT * FROM Person.CountryRegion', adventureworks_conn)
adventureworks_person_emailaddress = pd.read_sql('SELECT * FROM Person.EmailAddress', adventureworks_conn)
adventureworks_person_password = pd.read_sql('SELECT * FROM Person.Password', adventureworks_conn)
adventureworks_person_person = pd.read_sql('SELECT * FROM Person.Person', adventureworks_conn)
adventureworks_person_personphone = pd.read_sql('SELECT * FROM Person.PersonPhone', adventureworks_conn)
adventureworks_person_phonenumbertype = pd.read_sql('SELECT * FROM Person.PhoneNumberType', adventureworks_conn)
adventureworks_person_stateprovince = pd.read_sql('SELECT * FROM Person.StateProvince', adventureworks_conn)

In [441]:
adventureworks_production_bill_of_materials = pd.read_sql('SELECT * FROM Production.BillOfMaterials', adventureworks_conn)
adventureworks_production_culture = pd.read_sql('SELECT * FROM Production.Culture', adventureworks_conn)
adventureworks_production_document = pd.read_sql('SELECT * FROM Production.Document', adventureworks_conn)
adventureworks_production_illustration = pd.read_sql('SELECT * FROM Production.Illustration', adventureworks_conn)
adventureworks_production_location = pd.read_sql('SELECT * FROM Production.Location', adventureworks_conn)
adventureworks_production_product = pd.read_sql('SELECT * FROM Production.Product', adventureworks_conn)
adventureworks_production_productcategory = pd.read_sql('SELECT * FROM Production.ProductCategory', adventureworks_conn)
adventureworks_production_productcosthistory = pd.read_sql('SELECT * FROM Production.ProductCostHistory', adventureworks_conn)
adventureworks_production_productdescription = pd.read_sql('SELECT * FROM Production.ProductDescription', adventureworks_conn)
adventureworks_production_productdocument = pd.read_sql('SELECT * , CAST(DocumentNode AS VARCHAR(MAX)) AS DocumentNodeString  FROM Production.ProductDocument', adventureworks_conn)
adventureworks_production_productinventory = pd.read_sql('SELECT * FROM Production.ProductInventory', adventureworks_conn)
adventureworks_production_productlistpricehistory = pd.read_sql('SELECT * FROM Production.ProductListPriceHistory', adventureworks_conn)
adventureworks_production_productmodel = pd.read_sql('SELECT * FROM Production.ProductModel', adventureworks_conn)
adventureworks_production_productmodelillustration = pd.read_sql('SELECT * FROM Production.ProductModelIllustration', adventureworks_conn)
adventureworks_production_productmodelproductdescriptionculture = pd.read_sql('SELECT * FROM Production.ProductModelProductDescriptionCulture', adventureworks_conn)
adventureworks_production_productphoto = pd.read_sql('SELECT ProductPhotoID, CONVERT(VARCHAR(MAX),ThumbNailPhoto, 1) as ThumbNailPhotoHexString, ThumbNailPhotoFileName, CONVERT(VARCHAR(MAX), LargePhoto, 1) as LargePhotoHexString, LargePhotoFileName, ModifiedDate FROM Production.ProductPhoto', adventureworks_conn)
adventureworks_production_productproductphoto = pd.read_sql('SELECT * FROM Production.ProductProductPhoto', adventureworks_conn)
adventureworks_production_productreview = pd.read_sql('SELECT * FROM Production.ProductReview', adventureworks_conn)
adventureworks_production_productsubcategory = pd.read_sql('SELECT * FROM Production.ProductSubcategory', adventureworks_conn)
adventureworks_production_scrapreason = pd.read_sql('SELECT * FROM Production.ScrapReason', adventureworks_conn)
adventureworks_production_transactionhistory = pd.read_sql('SELECT * FROM Production.TransactionHistory', adventureworks_conn)
adventureworks_production_transactionhistoryarchive = pd.read_sql('SELECT * FROM Production.TransactionHistoryArchive', adventureworks_conn)
adventureworks_production_unitmeasure = pd.read_sql('SELECT * FROM Production.UnitMeasure', adventureworks_conn)
adventureworks_production_workorder = pd.read_sql('SELECT * FROM Production.WorkOrder', adventureworks_conn)
adventureworks_production_workorderrouting = pd.read_sql('SELECT * FROM Production.WorkOrderRouting', adventureworks_conn)

In [442]:
adventureworks_purchasing_productvendor = pd.read_sql('SELECT * FROM Purchasing.ProductVendor', adventureworks_conn)
adventureworks_purchasing_purchaseorderdetail = pd.read_sql('SELECT * FROM Purchasing.PurchaseOrderDetail', adventureworks_conn)
adventureworks_purchasing_purchaseorderheader = pd.read_sql('SELECT * FROM Purchasing.PurchaseOrderHeader', adventureworks_conn)
adventureworks_purchasing_shipmethod = pd.read_sql('SELECT * FROM Purchasing.ShipMethod', adventureworks_conn)
adventureworks_purchasing_vendor = pd.read_sql('SELECT * FROM Purchasing.Vendor', adventureworks_conn)

In [443]:
adventureworks_sales_countryregioncurrency = pd.read_sql('SELECT * FROM Sales.CountryRegionCurrency', adventureworks_conn)
adventureworks_sales_creditcard = pd.read_sql('SELECT * FROM Sales.CreditCard', adventureworks_conn)
adventureworks_sales_currency = pd.read_sql('SELECT * FROM Sales.Currency', adventureworks_conn)
adventureworks_sales_currencyrate = pd.read_sql('SELECT * FROM Sales.CurrencyRate', adventureworks_conn)
adventureworks_sales_customer = pd.read_sql('SELECT * FROM Sales.Customer', adventureworks_conn)
adventureworks_sales_personcreditcard = pd.read_sql('SELECT * FROM Sales.PersonCreditCard', adventureworks_conn)
adventureworks_sales_salesorderdetail = pd.read_sql('SELECT * FROM Sales.SalesOrderDetail', adventureworks_conn)
adventureworks_sales_salesorderheader = pd.read_sql('SELECT * FROM Sales.SalesOrderHeader', adventureworks_conn)
adventureworks_sales_salesorderhearerrsaleseason = pd.read_sql('SELECT * FROM Sales.SalesOrderHeaderSalesReason', adventureworks_conn)
adventureworks_sales_salesperson = pd.read_sql('SELECT * FROM Sales.SalesPerson', adventureworks_conn)
adventureworks_sales_salespersonquotahistory = pd.read_sql('SELECT * FROM Sales.SalesPersonQuotaHistory', adventureworks_conn)
adventureworks_sales_salesreason = pd.read_sql('SELECT * FROM Sales.SalesReason', adventureworks_conn)
adventureworks_sales_salestaxrate = pd.read_sql('SELECT * FROM Sales.SalesTaxRate', adventureworks_conn)
adventureworks_sales_salesterritory = pd.read_sql('SELECT * FROM Sales.SalesTerritory', adventureworks_conn)
adventureworks_sales_salesterritoryhistory = pd.read_sql('SELECT * FROM Sales.SalesTerritoryHistory', adventureworks_conn)
adventureworks_sales_shoppingcartitem = pd.read_sql('SELECT * FROM Sales.ShoppingCartItem', adventureworks_conn)
adventureworks_sales_specialoffer = pd.read_sql('SELECT * FROM Sales.SpecialOffer', adventureworks_conn)
adventureworks_sales_specialofferproduct = pd.read_sql('SELECT * FROM Sales.SpecialOfferProduct', adventureworks_conn)
adventureworks_sales_store = pd.read_sql('SELECT * FROM Sales.Store', adventureworks_conn)

adventureworks_conn.close()

## Combining the data
ORDER MATTERS, CAUSE SURROGATE KEYS
SK STILL NEEDS TO BE DONE

### Departments
Combining aenc and adventureworks department data

In [444]:
# adding DEPARTMENT_source_database columns to the dataframes
aenc_department['DEPARTMENT_source_database'] = 'aenc'
adventureworks_humanresources_department['DEPARTMENT_source_database'] = 'adventureworks'

# combining all department data
departments = pd.concat([aenc_department, adventureworks_humanresources_department], ignore_index=True)

# combining name and department name columns to create a name column
departments['DEPARTMENT_DEPARTMENT_DeptName'] = departments['Name'].combine_first(departments['dept_name'])
# combining dept_id and DepartmentID columns
departments['DEPARTMENT_DEPARTMENT_DeptID'] = departments['dept_id'].combine_first(departments['DepartmentID'])

# dropping the redundant columns
drop_modified_date_rowguid(departments)
departments.drop(columns=['dept_id', 'Name', 'dept_name', 'DepartmentID'], inplace=True)

# renaming the remaining columns
departments.rename(columns={'dept_head_id': 'DEPARTMENT_DEPARTMENT_DeptHeadID', 'GroupName': 'DEPARTMENT_DEPARTMENT_GroupName'}, inplace=True)

departments.head()

  departments['DEPARTMENT_DEPARTMENT_DeptID'] = departments['dept_id'].combine_first(departments['DepartmentID'])


Unnamed: 0,DEPARTMENT_DEPARTMENT_DeptHeadID,DEPARTMENT_source_database,DEPARTMENT_DEPARTMENT_GroupName,DEPARTMENT_DEPARTMENT_DeptName,DEPARTMENT_DEPARTMENT_DeptID
0,,adventureworks,Research and Development,Engineering,1.0
1,,adventureworks,Research and Development,Tool Design,2.0
2,,adventureworks,Sales and Marketing,Sales,3.0
3,,adventureworks,Sales and Marketing,Marketing,4.0
4,,adventureworks,Inventory Management,Purchasing,5.0


### BusinessEntities

In [445]:
# Combining the adventureworks Person.BusinessEntity, BusinessEntityContact and ContactType data
businessentities= pd.merge(adventureworks_person_businessentity, adventureworks_person_businessentitycontact, left_on='BusinessEntityID', right_on='BusinessEntityID', suffixes=('_person_businessentity', '_businessentitycontact'), how="outer")

businessentities = pd.merge(businessentities, adventureworks_person_contacttype, left_on='ContactTypeID', right_on='ContactTypeID', suffixes=('', '_contacttype'), how="outer")

# dropping the modified date and rowguid columns
drop_modified_date_rowguid(businessentities)

# renaming the columns
businessentities.rename(columns={'BusinessEntityID': 'BUSINESSENTITY_BUSINESSENTITY_BusinessEntityID', 'PersonID': 'BUSINESSENTITY_BUSINESSENTITYCONTACT_PersonID', 'ContactTypeID': 'BUSINESSENTITY_CONTACTTYPE_ContactTypeID', 'Name' : 'BUSINESSENTITY_CONTACTTYPE_Name'}, inplace=True)

businessentities.head()

Unnamed: 0,BUSINESSENTITY_BUSINESSENTITY_BusinessEntityID,BUSINESSENTITY_BUSINESSENTITYCONTACT_PersonID,BUSINESSENTITY_CONTACTTYPE_ContactTypeID,BUSINESSENTITY_CONTACTTYPE_Name
0,,,1.0,Accounting Manager
1,1510.0,1509.0,2.0,Assistant Sales Agent
2,1518.0,1517.0,2.0,Assistant Sales Agent
3,1522.0,1521.0,2.0,Assistant Sales Agent
4,1528.0,1527.0,2.0,Assistant Sales Agent


### BusinessEntityAddresses

In [446]:
# Combining the adventureworks Person.BusinessEntityAddress, Address and AddressType data
businessentityaddresses = pd.merge(adventureworks_person_businessentityaddress, adventureworks_person_address, left_on='AddressID', right_on='AddressID', suffixes=('', '_address'), how="outer")

businessentityaddresses = pd.merge(businessentityaddresses, adventureworks_person_address_type, left_on='AddressTypeID', right_on='AddressTypeID', suffixes=('', '_address_type'), how="outer")


# dropping the modified date and rowguid columns
drop_modified_date_rowguid(businessentityaddresses)

# renaming the columns
businessentityaddresses.rename(columns={'BusinessEntityID': 'BUSINESSENTITYADDRESS_BUSINESSENTITYADDRESS_BusinessEntityID', 'AddressID': 'BUSINESSENTITYADDRESS_ADDRESS_AddressID', 'AddressTypeID': 'BUSINESSENTITYADDRESS_ADDRESSTYPE_AddressTypeID', 'AddressLine1' : 'BUSINESSENTITYADDRESS_ADDRESS_AddressLine1', 'AddressLine2' : 'BUSINESSENTITYADDRESS_ADDRESS_AddressLine2', 'City' : 'BUSINESSENTITYADDRESS_ADDRESS_City', 'StateProvinceID' : 'BUSINESSENTITYADDRESS_ADDRESS_StateProvinceID', 'PostalCode' : 'BUSINESSENTITYADDRESS_ADDRESS_POSTALCODE', 'SpatialLocation' : 'BUSINESSENTITYADDRESS_ADDRESS_SpatialLocation', 'Name' : 'BUSINESSENTITYADDRESS_ADDRESSTYPE_Name'}, inplace=True)

businessentityaddresses.head()

Unnamed: 0,BUSINESSENTITYADDRESS_BUSINESSENTITYADDRESS_BusinessEntityID,BUSINESSENTITYADDRESS_ADDRESS_AddressID,BUSINESSENTITYADDRESS_ADDRESSTYPE_AddressTypeID,BUSINESSENTITYADDRESS_ADDRESS_AddressLine1,BUSINESSENTITYADDRESS_ADDRESS_AddressLine2,BUSINESSENTITYADDRESS_ADDRESS_City,BUSINESSENTITYADDRESS_ADDRESS_StateProvinceID,BUSINESSENTITYADDRESS_ADDRESS_POSTALCODE,BUSINESSENTITYADDRESS_ADDRESS_SpatialLocation,BUSINESSENTITYADDRESS_ADDRESSTYPE_Name
0,,,1,,,,,,,Billing
1,12.0,1.0,2,1970 Napa Ct.,,Bothell,79.0,98011.0,POINT (-122.164644615406 47.7869921906598),Home
2,123.0,2.0,2,9833 Mt. Dias Blv.,,Bothell,79.0,98011.0,POINT (-122.250185528911 47.6867097047995),Home
3,285.0,3.0,2,7484 Roundtree Drive,,Bothell,79.0,98011.0,POINT (-122.274625789912 47.7631154083121),Home
4,251.0,4.0,2,9539 Glenside Dr,,Bothell,79.0,98011.0,POINT (-122.335726442416 47.7392386259644),Home


### People

In [447]:
# combining person data from adventureworks HumanResources.Person, PersonPhone, PhoneNumberType, EmailAddress and Password
people = pd.merge(adventureworks_person_person, adventureworks_person_personphone, left_on='BusinessEntityID', right_on='BusinessEntityID', suffixes= ('_person', '_personphone'), how="outer")

people = pd.merge(people, adventureworks_person_phonenumbertype, left_on='PhoneNumberTypeID', right_on='PhoneNumberTypeID', suffixes=('', '_phonenumbertype'), how="outer")

people = pd.merge(people, adventureworks_person_emailaddress, left_on='BusinessEntityID', right_on='BusinessEntityID', suffixes=('','_emailaddress'), how="outer")

people = pd.merge(people, adventureworks_person_password, left_on='BusinessEntityID', right_on='BusinessEntityID', suffixes=('','_password'), how="outer")

# dropping the modified date and rowguid columns
drop_modified_date_rowguid(people)

# renaming the columns
people.rename(columns={'BusinessEntityID': 'PERSON_PERSON_BusinessEntityID', 'PersonType': 'PERSON_PERSON_PersonType', 'NameStyle': 'PERSON_PERSON_NameStyle', 'Title': 'PERSON_PERSON_Title', 'FirstName': 'PERSON_PERSON_FirstName', 'MiddleName' : 'PERSON_PERSON_MiddleName', 'LastName' : 'PERSON_PERSON_LastName', 'Suffix': 'PERSON_PERSON_Suffix', 'EmailPromotion' : 'PERSON_PERSON_EmailPromotion', 'AdditionalContactInfo' : 'PERSON_PERSON_AdditionalContactInfo', 'Demographics' : 'PERSON_PERSON_Demographics', 'PhoneNumber' : 'PERSON_PERSONPHONE_PhoneNumber', 'PhoneNumberTypeID' : 'PERSON_PHONENUMBERTYPE_PhoneNumberTypeID', 'Name' : 'PERSON_PHONENUMBERTYPE_Name', 'EmailAddressID': 'PERSON_EMAILADDRESS_EmailAddressID', 'EmailAddress' : 'PERSON_EMAILADDRESS_EmailAddress', 'PasswordHash' : 'PERSON_PASSWORD_PasswordHash', 'PasswordSalt' : 'PERSON_PASSWORD_PasswordSalt'}, inplace=True)

people.head()

Unnamed: 0,PERSON_PERSON_BusinessEntityID,PERSON_PERSON_PersonType,PERSON_PERSON_NameStyle,PERSON_PERSON_Title,PERSON_PERSON_FirstName,PERSON_PERSON_MiddleName,PERSON_PERSON_LastName,PERSON_PERSON_Suffix,PERSON_PERSON_EmailPromotion,PERSON_PERSON_AdditionalContactInfo,PERSON_PERSON_Demographics,PERSON_PERSONPHONE_PhoneNumber,PERSON_PHONENUMBERTYPE_PhoneNumberTypeID,PERSON_PHONENUMBERTYPE_Name,PERSON_EMAILADDRESS_EmailAddressID,PERSON_EMAILADDRESS_EmailAddress,PERSON_PASSWORD_PasswordHash,PERSON_PASSWORD_PasswordSalt
0,1,EM,False,,Ken,J,Sánchez,,0,,"<IndividualSurvey xmlns=""http://schemas.micros...",697-555-0142,1,Cell,1,ken0@adventure-works.com,pbFwXWE99vobT6g+vPWFy93NtUU/orrIWafF01hccfM=,bE3XiWw=
1,2,EM,False,,Terri,Lee,Duffy,,1,,"<IndividualSurvey xmlns=""http://schemas.micros...",819-555-0175,3,Work,2,terri0@adventure-works.com,bawRVNrZQYQ05qF05Gz6VLilnviZmrqBReTTAGAudm0=,EjJaC3U=
2,3,EM,False,,Roberto,,Tamburello,,0,,"<IndividualSurvey xmlns=""http://schemas.micros...",212-555-0187,1,Cell,3,roberto0@adventure-works.com,8BUXrZfDqO1IyHCWOYzYmqN1IhTUn3CJMpdx/UCQ3iY=,wbPZqMw=
3,4,EM,False,,Rob,,Walters,,0,,"<IndividualSurvey xmlns=""http://schemas.micros...",612-555-0100,1,Cell,4,rob0@adventure-works.com,SjLXpiarHSlz+6AG+H+4QpB/IPRzras/+9q/5Wr7tf8=,PwSunQU=
4,5,EM,False,Ms.,Gail,A,Erickson,,0,,"<IndividualSurvey xmlns=""http://schemas.micros...",849-555-0139,1,Cell,5,gail0@adventure-works.com,8FYdAiY6gWuBsgjCFdg0UibtsqOcWHf9TyaHIP7+paA=,qYhZRiM=


### Products (TODO CHANGE)

In [448]:
# renaming aenc product columns to match the other product data
aenc_product.rename(columns={'id': 'ProductID', 'name' : 'ProductName', 'description':'Description' , 'prod_size' : 'ProdSize', 'color': 'Color', 'quantity': 'Quantity','unit_price' : 'UnitPrice'}, inplace=True)

aenc_product.head()

Unnamed: 0,ProductID,ProductName,Description,ProdSize,Color,Quantity,UnitPrice,picture_name,Category


In [449]:
# combining all adventureworks product data
adventureworks_combined_products = pd.concat([adventureworks_production_product, adventureworks_production_productcategory, adventureworks_production_productsubcategory, adventureworks_production_productdescription, adventureworks_production_productdocument, adventureworks_production_productmodel, adventureworks_production_productmodelillustration, adventureworks_production_productmodelproductdescriptionculture, adventureworks_production_productphoto, adventureworks_production_productproductphoto  ], ignore_index=True)

adventureworks_combined_products.head()

Unnamed: 0,ProductID,Name,ProductNumber,MakeFlag,FinishedGoodsFlag,Color,SafetyStockLevel,ReorderPoint,StandardCost,ListPrice,...,CatalogDescription,Instructions,IllustrationID,CultureID,ProductPhotoID,ThumbNailPhotoHexString,ThumbNailPhotoFileName,LargePhotoHexString,LargePhotoFileName,Primary
0,1.0,Adjustable Race,AR-5381,False,False,,1000.0,750.0,0.0,0.0,...,,,,,,,,,,
1,2.0,Bearing Ball,BA-8327,False,False,,1000.0,750.0,0.0,0.0,...,,,,,,,,,,
2,3.0,BB Ball Bearing,BE-2349,True,False,,800.0,600.0,0.0,0.0,...,,,,,,,,,,
3,4.0,Headset Ball Bearings,BE-2908,False,False,,800.0,600.0,0.0,0.0,...,,,,,,,,,,
4,316.0,Blade,BL-2036,True,False,,800.0,600.0,0.0,0.0,...,,,,,,,,,,


In [450]:
# Combining all product data
products = pd.concat([northwind_products, aenc_product, adventureworks_combined_products], ignore_index=True)

# replacing the document node with documentnodestring
products['DocumentNode'] = products['DocumentNodeString']
products.drop(columns=['DocumentNodeString'], inplace=True)

# applying the data types to the columns
products['ProductID'] = products['ProductID'].astype(float) # casting to float seems counterintuitive but it is necessary to avoid an error in Pandas

# dropping documentnode column (need to fix later)
products.drop(columns=['DocumentNode'], inplace=True)


products.head()

  products = pd.concat([northwind_products, aenc_product, adventureworks_combined_products], ignore_index=True)


Unnamed: 0,ProductID,ProductName,SupplierID,CategoryID,QuantityPerUnit,UnitPrice,UnitsInStock,UnitsOnOrder,ReorderLevel,Discontinued,...,CatalogDescription,Instructions,IllustrationID,CultureID,ProductPhotoID,ThumbNailPhotoHexString,ThumbNailPhotoFileName,LargePhotoHexString,LargePhotoFileName,Primary
0,1.0,Chai,1.0,1.0,10 boxes x 20 bags,18.0,39.0,0.0,10.0,False,...,,,,,,,,,,
1,2.0,Chang,1.0,1.0,24 - 12 oz bottles,19.0,17.0,40.0,25.0,False,...,,,,,,,,,,
2,3.0,Aniseed Syrup,1.0,2.0,12 - 550 ml bottles,10.0,13.0,70.0,25.0,False,...,,,,,,,,,,
3,4.0,Chef Anton's Cajun Seasoning,2.0,2.0,48 - 6 oz jars,22.0,53.0,0.0,0.0,False,...,,,,,,,,,,
4,5.0,Chef Anton's Gumbo Mix,2.0,2.0,36 boxes,21.35,0.0,0.0,0.0,True,...,,,,,,,,,,


### Regions (TODO CHANGE)

In [451]:
# Combining all region data
regions = pd.concat([northwind_region, aenc_region, adventureworks_person_stateprovince], ignore_index=True)

# combining regiondescription and region columns to create a name column (one is always None)
regions['RegionName'] = regions['RegionDescription'].combine_first(regions['region'])

# dropping the other columns
regions.drop(columns=['RegionDescription', 'region'], inplace=True)


regions.head()

Unnamed: 0,RegionID,StateProvinceID,StateProvinceCode,CountryRegionCode,IsOnlyStateProvinceFlag,Name,TerritoryID,rowguid,ModifiedDate,RegionName
0,1.0,,,,,,,,NaT,Eastern ...
1,2.0,,,,,,,,NaT,Western ...
2,3.0,,,,,,,,NaT,Northern ...
3,4.0,,,,,,,,NaT,Southern ...
4,,1.0,AB,CA,False,Alberta,6.0,298C2880-AB1C-4982-A5AD-A36EB4BA0D34,2014-02-08 10:17:21.587,


### Customers (TODO CHANGE)

In [452]:
# combining northwind customer data
northwind_combined_customer = pd.concat([northwind_customers, northwind_customer_customer_demo, northwind_customer_demographics], ignore_index=True)

# renaming northwind customer columns to match the other customer data
northwind_combined_customer.rename(columns={'PostalCode' : 'Zip'}, inplace=True)

northwind_combined_customer.head()

Unnamed: 0,CustomerID,CompanyName,ContactName,ContactTitle,Address,City,Region,Zip,Country,Phone,Fax,CustomerTypeID,CustomerDesc
0,ALFKI,Alfreds Futterkiste,Maria Anders,Sales Representative,Obere Str. 57,Berlin,,12209,Germany,030-0074321,030-0076545,,
1,ANATR,Ana Trujillo Emparedados y helados,Ana Trujillo,Owner,Avda. de la Constitución 2222,México D.F.,,05021,Mexico,(5) 555-4729,(5) 555-3745,,
2,ANTON,Antonio Moreno Taquería,Antonio Moreno,Owner,Mataderos 2312,México D.F.,,05023,Mexico,(5) 555-3932,,,
3,AROUT,Around the Horn,Thomas Hardy,Sales Representative,120 Hanover Sq.,London,,WA1 1DP,UK,(171) 555-7788,(171) 555-6750,,
4,BERGS,Berglunds snabbköp,Christina Berglund,Order Administrator,Berguvsvägen 8,Luleå,,S-958 22,Sweden,0921-12 34 65,0921-12 34 67,,


In [453]:
# combining all adventureworks customer data
adventureworks_combined_customers = pd.merge(adventureworks_sales_customer, adventureworks_person_person, left_on='PersonID', right_on='BusinessEntityID', how='outer', suffixes=('_sales_customer', '_person'))

adventureworks_combined_customers = pd.merge(adventureworks_combined_customers, adventureworks_person_businessentityaddress, left_on='BusinessEntityID', right_on='BusinessEntityID', suffixes=('', '_businessentityaddress'))

adventureworks_combined_customers = pd.merge(adventureworks_combined_customers, adventureworks_person_address, left_on='AddressID', right_on='AddressID', how='left', suffixes=('', '_address'))

adventureworks_combined_customers = pd.merge(adventureworks_combined_customers, adventureworks_person_address_type, left_on='AddressTypeID', right_on='AddressTypeID', how='left', suffixes=('', '_address_type'))

adventureworks_combined_customers = pd.merge(adventureworks_combined_customers, adventureworks_person_stateprovince, left_on=['StateProvinceID', 'TerritoryID'], right_on=['StateProvinceID', 'TerritoryID'], suffixes=('', '_stateprovince'))

adventureworks_combined_customers = pd.merge(adventureworks_combined_customers, adventureworks_person_countryregion, left_on='CountryRegionCode', right_on='CountryRegionCode', suffixes=('', '_countryregion'))

# combining fist, middle and last name columns to create a contact name column
adventureworks_combined_customers['ContactName'] = adventureworks_combined_customers['FirstName'] + ' ' + adventureworks_combined_customers['MiddleName'] + ' ' + adventureworks_combined_customers['LastName']
adventureworks_combined_customers.drop(columns=['FirstName', 'MiddleName', 'LastName'], inplace=True)

# Remove any double spaces caused by missing middle names
adventureworks_combined_customers['ContactName'] = adventureworks_combined_customers['ContactName'].str.replace('  ', ' ')

# Get all columns that contain 'rowguid' in their name
columns_to_drop = adventureworks_combined_customers.filter(like='rowguid').columns

# Drop these columns
adventureworks_combined_customers.drop(columns=columns_to_drop, inplace=True)

# only keeping the most recent modified date from the two tables
adventureworks_combined_customers['ModifiedDate'] = adventureworks_combined_customers[['ModifiedDate_sales_customer', 'ModifiedDate_person', 'ModifiedDate', 'ModifiedDate_address', 'ModifiedDate_address_type', 'ModifiedDate_stateprovince', 'ModifiedDate_countryregion']].max(axis=1)

# dropping the other modified date columns
adventureworks_combined_customers.drop(columns=['ModifiedDate_sales_customer', 'ModifiedDate_person', 'ModifiedDate', 'ModifiedDate_address', 'ModifiedDate_address_type', 'ModifiedDate_stateprovince', 'ModifiedDate_countryregion'], inplace=True)

# combining PersonID and BusinessEntityID columns
adventureworks_combined_customers['PersonID'] = adventureworks_combined_customers['PersonID'].combine_first(adventureworks_combined_customers['BusinessEntityID'])
adventureworks_combined_customers.drop(columns=['BusinessEntityID'], inplace=True)

# renaming columns to match the other customer data
adventureworks_combined_customers.rename(columns={'AddressLine1' : 'Address', 'PostalCode' : 'Zip', 'Name' : 'AddressType', 'Name_stateprovince' : 'StateProvince', 'Name_countryregion': 'CountryRegion' }, inplace=True)

adventureworks_combined_customers.head()

Unnamed: 0,CustomerID,PersonID,StoreID,TerritoryID,AccountNumber,PersonType,NameStyle,Title,Suffix,EmailPromotion,...,StateProvinceID,Zip,SpatialLocation,AddressType,StateProvinceCode,CountryRegionCode,IsOnlyStateProvinceFlag,StateProvince,CountryRegion,ContactName
0,11377.0,1699.0,,8.0,AW00011377,IN,False,Mr.,,1.0,...,53,42651,POINT (7.11082410683939 51.2015555665827),Home,NW,DE,False,Nordrhein-Westfalen,Germany,David R. Robinett
1,11913.0,1700.0,,9.0,AW00011913,IN,False,Ms.,,0.0,...,77,3198,POINT (145.141451560879 -38.0612939642931),Home,VIC,AU,False,Victoria,Australia,Rebecca A. Robinson
2,11952.0,1701.0,,9.0,AW00011952,IN,False,Ms.,,2.0,...,77,3220,POINT (144.201620782255 -38.1464342680786),Home,VIC,AU,False,Victoria,Australia,Dorothy B. Robinson
3,20164.0,1702.0,,10.0,AW00020164,IN,False,Ms.,,0.0,...,14,LA1 1LN,POINT (-2.80326155845985 54.1184442932464),Home,ENG,GB,True,England,United Kingdom,Carol Ann F. Rockne
4,20211.0,1703.0,,9.0,AW00020211,IN,False,Mr.,,0.0,...,64,4169,POINT (152.9802503342 -27.4802117164592),Home,QLD,AU,False,Queensland,Australia,Scott M. Rodgers


In [454]:
# renaming aenc customer columns to match the other customer data
aenc_customer.rename(columns={'id': 'CustomerID', 'address' : 'Address', 'city':'City' , 'state' : 'State', 'zip' : 'Zip', 'phone': 'Phone', 'company_name' : 'CompanyName'}, inplace=True)

# combining fname and lname columns to create a contact name column
aenc_customer['ContactName'] = aenc_customer['fname'] + ' ' + aenc_customer['lname']
aenc_customer.drop(columns=['fname', 'lname'], inplace=True)

aenc_customer.head()

Unnamed: 0,CustomerID,Address,City,State,Zip,Phone,CompanyName,ContactName


In [455]:
# Combining all customer data
customers = pd.concat([northwind_combined_customer, aenc_customer, adventureworks_combined_customers], ignore_index=True)

customers.head()

Unnamed: 0,CustomerID,CompanyName,ContactName,ContactTitle,Address,City,Region,Zip,Country,Phone,...,AddressTypeID,AddressLine2,StateProvinceID,SpatialLocation,AddressType,StateProvinceCode,CountryRegionCode,IsOnlyStateProvinceFlag,StateProvince,CountryRegion
0,ALFKI,Alfreds Futterkiste,Maria Anders,Sales Representative,Obere Str. 57,Berlin,,12209,Germany,030-0074321,...,,,,,,,,,,
1,ANATR,Ana Trujillo Emparedados y helados,Ana Trujillo,Owner,Avda. de la Constitución 2222,México D.F.,,05021,Mexico,(5) 555-4729,...,,,,,,,,,,
2,ANTON,Antonio Moreno Taquería,Antonio Moreno,Owner,Mataderos 2312,México D.F.,,05023,Mexico,(5) 555-3932,...,,,,,,,,,,
3,AROUT,Around the Horn,Thomas Hardy,Sales Representative,120 Hanover Sq.,London,,WA1 1DP,UK,(171) 555-7788,...,,,,,,,,,,
4,BERGS,Berglunds snabbköp,Christina Berglund,Order Administrator,Berguvsvägen 8,Luleå,,S-958 22,Sweden,0921-12 34 65,...,,,,,,,,,,


### SpecialOffer

In [456]:
# merge specialoffer with specialofferproduct
specialoffers = pd.merge(adventureworks_sales_specialoffer, adventureworks_sales_specialofferproduct, on="SpecialOfferID")

# drop rowguid and modifieddate
drop_modified_date_rowguid(specialoffers)

# rename to correct columns like in database
specialoffers = specialoffers.rename(columns={
    'SpecialOfferID': 'SPECIALOFFER_SPECIALOFFER_ID',
    'ProductID': 'SPECIALOFFER_SPECIALOFFERPRODUCT_ProductID',
    'Description': 'SPECIALOFFER_SPECIALOFFER_Description',
    'DiscountPct': 'SPECIALOFFER_SPECIALOFFER_DiscountPCT',
    'Type': 'SPECIALOFFER_SPECIALOFFER_Type',
    'Category': 'SPECIALOFFER_SPECIALOFFER_Category',
    'StartDate': 'SPECIALOFFER_SPECIALOFFER_StartDate',
    'EndDate': 'SPECIALOFFER_SPECIALOFFER_EndDate',
    'MinQty': 'SPECIALOFFER_SPECIALOFFER_MinQty',
    'MaxQty': 'SPECIALOFFER_SPECIALOFFER_MaxQty'    
})

specialoffers

Unnamed: 0,SPECIALOFFER_SPECIALOFFER_ID,SPECIALOFFER_SPECIALOFFER_Description,SPECIALOFFER_SPECIALOFFER_DiscountPCT,SPECIALOFFER_SPECIALOFFER_Type,SPECIALOFFER_SPECIALOFFER_Category,SPECIALOFFER_SPECIALOFFER_StartDate,SPECIALOFFER_SPECIALOFFER_EndDate,SPECIALOFFER_SPECIALOFFER_MinQty,SPECIALOFFER_SPECIALOFFER_MaxQty,SPECIALOFFER_SPECIALOFFERPRODUCT_ProductID
0,1,No Discount,0.0,No Discount,No Discount,2011-05-01,2014-11-30,0,,680
1,1,No Discount,0.0,No Discount,No Discount,2011-05-01,2014-11-30,0,,706
2,1,No Discount,0.0,No Discount,No Discount,2011-05-01,2014-11-30,0,,707
3,1,No Discount,0.0,No Discount,No Discount,2011-05-01,2014-11-30,0,,708
4,1,No Discount,0.0,No Discount,No Discount,2011-05-01,2014-11-30,0,,709
...,...,...,...,...,...,...,...,...,...,...
533,16,Mountain-500 Silver Clearance Sale,0.4,Discontinued Product,Reseller,2014-03-31,2014-05-30,0,,984
534,16,Mountain-500 Silver Clearance Sale,0.4,Discontinued Product,Reseller,2014-03-31,2014-05-30,0,,985
535,16,Mountain-500 Silver Clearance Sale,0.4,Discontinued Product,Reseller,2014-03-31,2014-05-30,0,,986
536,16,Mountain-500 Silver Clearance Sale,0.4,Discontinued Product,Reseller,2014-03-31,2014-05-30,0,,987


### CreditCard

In [457]:
# merge creditcard with personcreditcard
creditcards = pd.merge(adventureworks_sales_creditcard, adventureworks_sales_personcreditcard, on="CreditCardID")

# drop rowguid and modifieddate
drop_modified_date_rowguid(creditcards)

# rename to correct columns like in database
creditcards = creditcards.rename(columns={
    'CreditCardID': 'CREDITCARD_CREDITCARD_ID',
    'CardType': 'CREDITCARD_CREDITCARD_CardType',
    'CardNumber': 'CREDITCARD_CREDITCARD_CardNumber ',
    'ExpMonth': 'CREDITCARD_CREDITCARD_ExpMonth',
    'ExpYear': 'CREDITCARD_CREDITCARD_ExpYear',
    'BusinessEntityID': 'CREDITCARD_PERSONCREDITCARD_BusinessEntityID'    
})

creditcards

Unnamed: 0,CREDITCARD_CREDITCARD_ID,CREDITCARD_CREDITCARD_CardType,CREDITCARD_CREDITCARD_CardNumber,CREDITCARD_CREDITCARD_ExpMonth,CREDITCARD_CREDITCARD_ExpYear,CREDITCARD_PERSONCREDITCARD_BusinessEntityID
0,1,SuperiorCard,33332664695310,11,2006,4955
1,2,Distinguish,55552127249722,8,2005,13222
2,3,ColonialVoice,77778344838353,7,2005,7082
3,4,ColonialVoice,77774915718248,7,2006,9347
4,5,Vista,11114404600042,4,2005,11277
...,...,...,...,...,...,...
19113,19233,SuperiorCard,33335458414079,1,2005,5116
19114,19234,Vista,11114074915665,1,2007,2734
19115,19235,ColonialVoice,77774511327745,1,2006,6402
19116,19236,Vista,11112645284978,10,2008,12007


### Supplier

In [458]:
# rename
suppliers = northwind_suppliers.rename(columns={
    'SupplierID': 'SUPPLIER_SUPPLIERS_SupplierID',
    'CompanyName': 'SUPPLIER_SUPPLIERS_CompanyName',
    'ContactName': 'SUPPLIER_SUPPLIERS_ContactName',
    'ContactTitle': 'SUPPLIER_SUPPLIERS_ContactTitle',
    'Address': 'SUPPLIER_SUPPLIERS_Address',
    'City': 'SUPPLIER_SUPPLIERS_City',
    'Region': 'SUPPLIER_SUPPLIERS_Region',
    'PostalCode': 'SUPPLIER_SUPPLIERS_PostalCode',
    'Country': 'SUPPLIER_SUPPLIERS_Country',
    'Phone': 'SUPPLIER_SUPPLIERS_Phone',
    'Fax': 'SUPPLIER_SUPPLIERS_Fax',
    'HomePage': 'SUPPLIER_SUPPLIERS_HomePage'
})

suppliers

Unnamed: 0,SUPPLIER_SUPPLIERS_SupplierID,SUPPLIER_SUPPLIERS_CompanyName,SUPPLIER_SUPPLIERS_ContactName,SUPPLIER_SUPPLIERS_ContactTitle,SUPPLIER_SUPPLIERS_Address,SUPPLIER_SUPPLIERS_City,SUPPLIER_SUPPLIERS_Region,SUPPLIER_SUPPLIERS_PostalCode,SUPPLIER_SUPPLIERS_Country,SUPPLIER_SUPPLIERS_Phone,SUPPLIER_SUPPLIERS_Fax,SUPPLIER_SUPPLIERS_HomePage
0,1,Exotic Liquids,Charlotte Cooper,Purchasing Manager,49 Gilbert St.,London,,EC1 4SD,UK,(171) 555-2222,,
1,2,New Orleans Cajun Delights,Shelley Burke,Order Administrator,P.O. Box 78934,New Orleans,LA,70117,USA,(100) 555-4822,,#CAJUN.HTM#
2,3,Grandma Kelly's Homestead,Regina Murphy,Sales Representative,707 Oxford Rd.,Ann Arbor,MI,48104,USA,(313) 555-5735,(313) 555-3349,
3,4,Tokyo Traders,Yoshi Nagase,Marketing Manager,9-8 Sekimai Musashino-shi,Tokyo,,100,Japan,(03) 3555-5011,,
4,5,Cooperativa de Quesos 'Las Cabras',Antonio del Valle Saavedra,Export Administrator,Calle del Rosal 4,Oviedo,Asturias,33007,Spain,(98) 598 76 54,,
5,6,Mayumi's,Mayumi Ohno,Marketing Representative,92 Setsuko Chuo-ku,Osaka,,545,Japan,(06) 431-7877,,Mayumi's (on the World Wide Web)#http://www.mi...
6,7,"Pavlova, Ltd.",Ian Devling,Marketing Manager,74 Rose St. Moonie Ponds,Melbourne,Victoria,3058,Australia,(03) 444-2343,(03) 444-6588,
7,8,"Specialty Biscuits, Ltd.",Peter Wilson,Sales Representative,29 King's Way,Manchester,,M14 GSD,UK,(161) 555-4448,,
8,9,PB Knäckebröd AB,Lars Peterson,Sales Agent,Kaloadagatan 13,Göteborg,,S-345 67,Sweden,031-987 65 43,031-987 65 91,
9,10,Refrescos Americanas LTDA,Carlos Diaz,Marketing Manager,Av. das Americanas 12.890,Sao Paulo,,5442,Brazil,(11) 555 4640,,


### Currency

In [459]:
# combine currency and countryregioncurrency
currencies = pd.merge(adventureworks_sales_currency, adventureworks_sales_countryregioncurrency, on="CurrencyCode")

# now combine with currencyrate
currencies = pd.concat([currencies, adventureworks_sales_currencyrate], ignore_index=True)

#drop the modifieddate
drop_modified_date_rowguid(currencies)

#rename 
currencies = currencies.rename(columns={
    'CurrencyCode': 'CURRENCY_CURRENCY_CurrencyCode',
    'Name': 'CURRENCY_CURRENCY_Name',
    'CountryRegionCode': 'CURRENCY_COUNTRYREGIONCURRENCY_CountryRegionCode',
    'CurrencyRateID': 'CURRENCY_CURRENCYRATE_CurrencyRateID',
    'CurrencyRateDate': 'CURRENCY_CURRENCYRATE_CurrencyRateDate',
    'FromCurrencyCode': 'CURRENCY_CURRENCYRATE_FromCurrencyCode',
    'ToCurrencyCode': 'CURRENCY_CURRENCYRATE_ToCurrencyCode',
    'AverageRate': 'CURRENCY_CURRENCYRATE_AverageRate ',
    'EndOfDayRate': 'CURRENCY_CURRENCYRATE_EndOfDayRate',
})

currencies

Unnamed: 0,CURRENCY_CURRENCY_CurrencyCode,CURRENCY_CURRENCY_Name,CURRENCY_COUNTRYREGIONCURRENCY_CountryRegionCode,CURRENCY_CURRENCYRATE_CurrencyRateID,CURRENCY_CURRENCYRATE_CurrencyRateDate,CURRENCY_CURRENCYRATE_FromCurrencyCode,CURRENCY_CURRENCYRATE_ToCurrencyCode,CURRENCY_CURRENCYRATE_AverageRate,CURRENCY_CURRENCYRATE_EndOfDayRate
0,AED,Emirati Dirham,AE,,NaT,,,,
1,ARS,Argentine Peso,AR,,NaT,,,,
2,ATS,Shilling,AT,,NaT,,,,
3,AUD,Australian Dollar,AU,,NaT,,,,
4,BBD,Barbados Dollar,BB,,NaT,,,,
...,...,...,...,...,...,...,...,...,...
13636,,,,13528.0,2014-05-27,USD,USD,1.0,1.0
13637,,,,13529.0,2014-05-28,USD,USD,1.0,1.0
13638,,,,13530.0,2014-05-29,USD,USD,1.0,1.0
13639,,,,13531.0,2014-05-30,USD,USD,1.0,1.0


### Territory

In [460]:
# combine sales territory first
adventureworks_combined_territory = pd.merge(adventureworks_person_countryregion, adventureworks_person_stateprovince, on="CountryRegionCode", suffixes=('_pcr', '_sts'))

# now merge with salestaxrate
adventureworks_combined_territory = pd.merge(adventureworks_combined_territory, adventureworks_sales_salestaxrate, on="StateProvinceID", suffixes=('_st', '_sst'))

# combine northwind region with territory
nw_combined_territory = pd.merge(northwind_region, northwind_territories, on="RegionID")

# do the same for aenc
aenc_combined_regionstate = pd.merge(aenc_region, aenc_state, on="region")

#concatinate the tables together
territories = pd.concat([nw_combined_territory, aenc_combined_regionstate, adventureworks_combined_territory], ignore_index=True)

# drop unneeded rowguid and modifieddate
drop_modified_date_rowguid(territories)

# rename columns
territories = territories.rename(columns={
    'RegionID': 'TERRITORY_REGION_RegionID',
    'RegionDescription': 'TERRITORY_REGION_RegionDescription',
    'TerritoryID': 'TERRITORY_TERRITORIES_TerritoryID',
    'TerritoryDescription': 'TERRITORY_TERRITORIES_TerritoryDescription',
    'region': 'TERRITORY_REGION_Region',
    'state_id': 'TERRITORY_STATE_StateID',
    'state_name': 'TERRITORY_STATE_StateName',
    'state_capital': 'TERRITORY_STATE_StateCapital',
    'country': 'TERRITORY_STATE_Country',
    'CountryRegionCode': 'TERRITORY_COUNTRYREGION_CountryRegionCode',
    'Name_pcr': 'TERRITORY_COUNTRYREGION_Name',
    'StateProvinceID': 'TERRITORY_STATEPROVINCE_StateProvinceID',
    'StateProvinceCode': 'TERRITORY_STATEPROVINCE_StateProvinceCode',
    'IsOnlyStateProvinceFlag': 'TERRITORY_STATEPROVINCE_IsOnlyStateProvinceFlag',
    'Name_sts': 'TERRITORY_STATEPROVINCE_Name',
    'SalesTaxRateID': 'TERRITORY_SALESTAXRATE_SalesTaxRateID',
    'TaxType': 'TERRITORY_SALESTAXRATE_TaxType',
    'TaxRate': 'TERRITORY_SALESTAXRATE_TaxRate',
    'Name': 'TERRITORY_SALESTAXRATE_Name'
})

territories

Unnamed: 0,TERRITORY_REGION_RegionID,TERRITORY_REGION_RegionDescription,TERRITORY_TERRITORIES_TerritoryID,TERRITORY_TERRITORIES_TerritoryDescription,TERRITORY_REGION_Region,TERRITORY_STATE_StateID,TERRITORY_STATE_StateName,TERRITORY_STATE_StateCapital,TERRITORY_STATE_Country,TERRITORY_COUNTRYREGION_CountryRegionCode,TERRITORY_COUNTRYREGION_Name,TERRITORY_STATEPROVINCE_StateProvinceID,TERRITORY_STATEPROVINCE_StateProvinceCode,TERRITORY_STATEPROVINCE_IsOnlyStateProvinceFlag,TERRITORY_STATEPROVINCE_Name,TERRITORY_SALESTAXRATE_SalesTaxRateID,TERRITORY_SALESTAXRATE_TaxType,TERRITORY_SALESTAXRATE_TaxRate,TERRITORY_SALESTAXRATE_Name
0,1.0,Eastern ...,01581,Westboro ...,,,,,,,,,,,,,,,
1,1.0,Eastern ...,01730,Bedford ...,,,,,,,,,,,,,,,
2,1.0,Eastern ...,01833,Georgetow ...,,,,,,,,,,,,,,,
3,1.0,Eastern ...,02116,Boston ...,,,,,,,,,,,,,,,
4,1.0,Eastern ...,02139,Cambridge ...,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,,,3,,,,,,,US,United States,36.0,MN,False,Minnesota,23.0,1.0,6.75,Minnesota State Sales Tax
78,,,5,,,,,,,US,United States,72.0,TN,False,Tennessee,24.0,1.0,7.25,Tennessese State Sales Tax
79,,,4,,,,,,,US,United States,73.0,TX,False,Texas,25.0,1.0,7.50,Texas State Sales Tax
80,,,1,,,,,,,US,United States,74.0,UT,False,Utah,26.0,1.0,5.00,Utah State Sales Tax


## Loading the data into the UnitedOutdoors datawarehouse

### Departments

In [461]:
departments_dtypes = {
    'DEPARTMENT_DEPARTMENT_DeptID': Integer,
    'DEPARTMENT_DEPARTMENT_DeptName': String(100),
    'DEPARTMENT_DEPARTMENT_GroupName': String(100),
    'DEPARTMENT_DEPARTMENT_DeptHeadID': Integer,
    'DEPARTMENT_source_database': String(100)
}

# TODO dept_head_id needs to refer to an employee
prepare_and_insert(departments, departments_dtypes, 'Department')

Inserting data into table: Department with chunk size: 400


### BusinessEntities

In [462]:
businessentities_dtypes = {
    'BUSINESSENTITY_BUSINESSENTITY_BusinessEntityID': Integer,
    'BUSINESSENTITY_BUSINESSENTITYCONTACT_PersonID': Integer,
    'BUSINESSENTITY_CONTACTTYPE_ContactTypeID': Integer,
    'BUSINESSENTITY_CONTACTTYPE_Name': String(100)
}

businessentities_nk_sk_dict = prepare_and_insert_return_sk(businessentities, businessentities_dtypes, 'BusinessEntity', 'BUSINESSENTITY_BUSINESSENTITY_BusinessEntityID')

Inserting data into table: BusinessEntity with chunk size: 500


### People

In [463]:
people_dtypes = {
    'PERSON_PERSON_BusinessEntityID': Integer,
    'PERSON_PERSON_PersonType': String(2),
    'PERSON_PERSON_NameStyle': BIT,
    'PERSON_PERSON_Title': String(100),
    'PERSON_PERSON_FirstName': String(100),
    'PERSON_PERSON_MiddleName': String(100),
    'PERSON_PERSON_LastName': String(100),
    'PERSON_PERSON_Suffix': String(100),
    'PERSON_PERSON_EmailPromotion': Integer,
    'PERSON_PERSON_AdditionalContactInfo': XML,
    'PERSON_PERSON_Demographics': XML,
    'PERSON_PERSONPHONE_PhoneNumber': String(100),
    'PERSON_PHONENUMBERTYPE_PhoneNumberTypeID': Integer,
    'PERSON_PHONENUMBERTYPE_Name': String(100),
    'PERSON_EMAILADDRESS_EmailAddressID': Integer,
    'PERSON_EMAILADDRESS_EmailAddress': String(100),
    'PERSON_PASSWORD_PasswordHash': LargeBinary,
    'PERSON_PASSWORD_PasswordSalt': LargeBinary
}

# Convert the 'PERSON_PASSWORD_PasswordHash' and 'PERSON_PASSWORD_PasswordSalt' columns to bytes
people['PERSON_PASSWORD_PasswordHash'] = people['PERSON_PASSWORD_PasswordHash'].apply(lambda x: x.encode('utf-8') if isinstance(x, str) else x)
people['PERSON_PASSWORD_PasswordSalt'] = people['PERSON_PASSWORD_PasswordSalt'].apply(lambda x: x.encode('utf-8') if isinstance(x, str) else x)

prepare_and_insert(people, people_dtypes, 'Person', { 'PERSON_PERSON_BusinessEntityID' : x})

NameError: name 'x' is not defined

### Updating the BusinessEntity table
replacing the natural keys with the surrogate keys for the PersonID column

In [None]:
prepare_and_update('BusinessEntity', united_outdoors_engine, { 'BUSINESSENTITY_BUSINESSENTITYCONTACT_PersonID' : businessentities_nk_sk_dict})

Updating data in table: BusinessEntity


### BusinessEntityAddresses

In [None]:
businessentityaddresses_dtypes = {
    'BUSINESSENTITYADDRESS_BUSINESSENTITYADDRESS_BusinessEntityID': Integer,
    'BUSINESSENTITYADDRESS_ADDRESSTYPE_AddressTypeID': Integer,
    'BUSINESSENTITYADDRESS_ADDRESSTYPE_Name': String(100),
    'BUSINESSENTITYADDRESS_ADDRESS_AddressID': Integer,
    'BUSINESSENTITYADDRESS_ADDRESS_AddressLine1': String(100),
    'BUSINESSENTITYADDRESS_ADDRESS_AddressLine2': String(100),
    'BUSINESSENTITYADDRESS_ADDRESS_City': String(100),
    'BUSINESSENTITYADDRESS_ADDRESS_POSTALCODE': String(100),
    'BUSINESSENTITYADDRESS_ADDRESS_SpatialLocation': VARCHAR,
    'BUSINESSENTITYADDRESS_ADDRESS_StateProvinceID': Integer
}

# TODO the StateProvinceID needs to refer to sk of Territory
prepare_and_insert(businessentityaddresses, businessentityaddresses_dtypes , 'BusinessEntityAddress', { 'BUSINESSENTITYADDRESS_BUSINESSENTITYADDRESS_BusinessEntityID' : businessentities_nk_sk_dict, 'BUSINESSENTITYADDRESS_ADDRESS_StateProvinceID' : {}})

Replacing natural keys with surrogate keys for column: BUSINESSENTITYADDRESS_BUSINESSENTITYADDRESS_BusinessEntityID
Replacing natural keys with surrogate keys for column: BUSINESSENTITYADDRESS_ADDRESS_StateProvinceID
Inserting data into table: BusinessEntityAddress with chunk size: 200


### Products (TODO CHANGE)

In [None]:
products_dtypes = {
    'ProductID': Integer,
    'ProductName': NVARCHAR(50),
    'SupplierID': Integer,
    'CategoryID': Integer,
    'QuantityPerUnit': NVARCHAR(30),
    'UnitPrice': MONEY,
    'UnitsInStock': Integer,
    'UnitsOnOrder': Integer,
    'ReorderLevel': Integer,
    'Discontinued': BIT,
    'Description': NVARCHAR,
    'ProdSize': NVARCHAR(50),
    'Color': NVARCHAR(15),
    'Quantity': Integer,
    'picture_name': NVARCHAR(50),
    'Category': NVARCHAR(20),
    'Name': NVARCHAR(50),
    'ProductNumber': NVARCHAR(25),
    'MakeFlag': BIT,
    'FinishedGoodsFlag': BIT,
    'SafetyStockLevel': Integer,
    'ReorderPoint': Integer,
    'StandardCost': DECIMAL(8,4),
    'ListPrice': MONEY,
    'Size': NVARCHAR(5),
    'SizeUnitMeasureCode': CHAR(2),
    'WeightUnitMeasureCode': NVARCHAR(3),
    'Weight': DECIMAL(8,2),
    'DaysToManufacture': Integer,
    'ProductLine': CHAR(1),
    'Class': CHAR(1),
    'Style': CHAR(1),
    'ProductSubcategoryID': Integer,
    'ProductModelID': Integer,
    'SellStartDate': DATE,
    'SellEndDate': DATE,
    'DiscontinuedDate': DATE,
    'ModifiedDate': DATE,
    'ProductCategoryID': Integer,
    'ProductDescriptionID': Integer,
    'CatalogDescription': XML,
    'Instructions': XML,
    'IllustrationID': Integer,
    'CultureID': NVARCHAR(10),
    'ProductPhotoID': Integer,
    'ThumbNailPhotoHexString': String,
    'ThumbnailPhotoFileName': NVARCHAR(50),
    'LargePhotoHexString': String,
    'LargePhotoFileName': NVARCHAR(50),
    'Primary': BIT
}

prepare_and_insert(products, products_dtypes, 'Product')


Inserting data into table: Product with chunk size: 39


### Regions (TODO CHANGE)

In [None]:
regions_dtypes = {
    'RegionID': Integer,
    'RegionName': VARCHAR(10),
    'StateProvinceID': Integer,
    'StateProvinceCode': VARCHAR(10),
    'CountryRegionCode': CHAR(2),
    'IsOnlyStateProvinceFlag': BIT,
    'Name': VARCHAR(50),
    'TerritoryID': Integer,
    'ModifiedDate': DATE
}

regions['StateProvinceCode'] = regions['StateProvinceCode'].astype(str)

prepare_and_insert(regions, regions_dtypes, 'Region')

Inserting data into table: Region with chunk size: 200


### Customers (TODO CHANGE)

In [None]:
customers_dtypes = {
    'CustomerID': NVARCHAR(10),
    'CompanyName': NVARCHAR(40),
    'ContactName': NVARCHAR(60),
    'ContactTitle': NVARCHAR(30),
    'Address': NVARCHAR(60),
    'City': NVARCHAR(30),
    'Region': VARCHAR(15),
    'Zip': VARCHAR(10),
    'Country': VARCHAR(15),
    'Phone': VARCHAR(24),
    'Fax': VARCHAR(24),
    'CustomerTypeID': VARCHAR,
    'CustomerDesc': VARCHAR(100),
    'State': CHAR(2),
    'PersonID': Integer,
    'StoreID': Integer,
    'TerritoryID': Integer,
    'AccountNumber': CHAR(10),
    'PersonType': VARCHAR(2),
    'NameStyle': BIT,
    'Title': VARCHAR(10),
    'Suffix': VARCHAR(10),
    'EmailPromotion': Integer,
    'AdditionalContactInfo': XML,
    'Demographics': XML,
    'AddressID': Integer,
    'AddressTypeID': Integer,
    'AddressLine2': VARCHAR(60),
    'StateProvinceID': Integer,
    'SpatialLocation': VARCHAR,  # SQLAlchemy does not support the GEOGRAPHY data type
    'AddressType': VARCHAR(50),
    'StateProvinceCode': VARCHAR(10),
    'CountryRegionCode': CHAR(2),
    'IsOnlyStateProvinceFlag': BIT,
    'StateProvince': VARCHAR(50),
    'CountryRegion': VARCHAR(50)
}
# applying the data types to the columns
customers['CustomerID'] = customers['CustomerID'].astype(str)

prepare_and_insert(customers, customers_dtypes, 'Customer')

Inserting data into table: Customer with chunk size: 55


ProgrammingError: (pyodbc.ProgrammingError) ('42S22', "[42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'CustomerID'. (207) (SQLExecDirectW); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'CompanyName'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'ContactName'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'ContactTitle'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'Address'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'City'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'Region'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'Zip'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'Country'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'Phone'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'Fax'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'CustomerTypeID'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'CustomerDesc'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'State'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'PersonID'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'StoreID'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'TerritoryID'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'AccountNumber'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'PersonType'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'NameStyle'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'Title'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'Suffix'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'EmailPromotion'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'AdditionalContactInfo'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'Demographics'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'AddressID'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'AddressTypeID'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'AddressLine2'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'StateProvinceID'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'SpatialLocation'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'AddressType'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'StateProvinceCode'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'CountryRegionCode'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'IsOnlyStateProvinceFlag'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'StateProvince'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'CountryRegion'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'CustomerID'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'CompanyName'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'ContactName'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'ContactTitle'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'Address'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'City'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'Region'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'Zip'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'Country'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'Phone'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'Fax'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'CustomerTypeID'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'CustomerDesc'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'State'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'PersonID'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'StoreID'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'TerritoryID'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'AccountNumber'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'PersonType'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'NameStyle'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'Title'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'Suffix'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'EmailPromotion'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'AdditionalContactInfo'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'Demographics'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'AddressID'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'AddressTypeID'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'AddressLine2'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'StateProvinceID'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'SpatialLocation'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'AddressType'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'StateProvinceCode'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'CountryRegionCode'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'IsOnlyStateProvinceFlag'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'StateProvince'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name 'CountryRegion'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Statement(s) could not be prepared. (8180)")
[SQL: INSERT INTO dbo.[Customer] ([CustomerID], [CompanyName], [ContactName], [ContactTitle], [Address], [City], [Region], [Zip], [Country], [Phone], [Fax], [CustomerTypeID], [CustomerDesc], [State], [PersonID], [StoreID], [TerritoryID], [AccountNumber], [PersonType], [NameStyle], [Title], [Suffix], [EmailPromotion], [AdditionalContactInfo], [Demographics], [AddressID], [AddressTypeID], [AddressLine2], [StateProvinceID], [SpatialLocation], [AddressType], [StateProvinceCode], [CountryRegionCode], [IsOnlyStateProvinceFlag], [StateProvince], [CountryRegion]) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)]
[parameters: ('ALFKI', 'Alfreds Futterkiste', 'Maria Anders', 'Sales Representative', 'Obere Str. 57', 'Berlin', None, '12209', 'Germany', '030-0074321', '030-0076545', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'ANATR', 'Ana Trujillo Emparedados y helados', 'Ana Trujillo', 'Owner', 'Avda. de la Constitución 2222', 'México D.F.', None, '05021', 'Mexico', '(5) 555-4729', '(5) 555-3745', None, None, None ... 1880 parameters truncated ... None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'OLDWO', 'Old World Delicatessen', 'Rene Phillips', 'Sales Representative', '2743 Bering St.', 'Anchorage', 'AK', '99508', 'USA', '(907) 555-7584', '(907) 555-2880', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)]
(Background on this error at: https://sqlalche.me/e/20/f405)

### SpecialOffer

In [468]:
specialoffers_dtypes = {
    'SPECIALOFFER_SPECIALOFFER_ID': Integer,
    'SPECIALOFFER_SPECIALOFFER_Description': String,
    'SPECIALOFFER_SPECIALOFFER_DiscountPCT': Integer,
    'SPECIALOFFER_SPECIALOFFER_Type': NVARCHAR(100),
    'SPECIALOFFER_SPECIALOFFER_Category': NVARCHAR(100),
    'SPECIALOFFER_SPECIALOFFER_StartDate': DATE,
    'SPECIALOFFER_SPECIALOFFER_EndDate': DATE,
    'SPECIALOFFER_SPECIALOFFER_MinQty': Integer,
    'SPECIALOFFER_SPECIALOFFER_MaxQty': Integer,
    'SPECIALOFFER_SPECIALOFFERPRODUCT_ProductID': Integer
}

prepare_and_insert(specialoffers, specialoffers_dtypes, 'SpecialOffer')

Inserting data into table: SpecialOffer with chunk size: 200


### CreditCard

In [467]:
creditcards_dtypes = {
    'CREDITCARD_CREDITCARD_ID': Integer,
    'CREDITCARD_CREDITCARD_CardType': NVARCHAR(100),
    'CREDITCARD_CREDITCARD_CardNumber': BigInteger,
    'CREDITCARD_CREDITCARD_ExpMonth': Integer,
    'CREDITCARD_CREDITCARD_ExpYear': Integer,
    'CREDITCARD_PERSONCREDITCARD_BusinessEntityID': Integer
}

prepare_and_insert(creditcards, creditcards_dtypes, 'CreditCard')

Inserting data into table: CreditCard with chunk size: 333


### Supplier

In [466]:
suppliers_dtypes = {
    'SUPPLIER_SUPPLIERS_SupplierID': Integer,
    'SUPPLIER_SUPPLIERS_CompanyName': NVARCHAR(100),
    'SUPPLIER_SUPPLIERS_ContactName': NVARCHAR(255),
    'SUPPLIER_SUPPLIERS_ContactTitle': NVARCHAR(100),
    'SUPPLIER_SUPPLIERS_Address': NVARCHAR(150),
    'SUPPLIER_SUPPLIERS_City': NVARCHAR(50),
    'SUPPLIER_SUPPLIERS_Region': NVARCHAR(50),
    'SUPPLIER_SUPPLIERS_PostalCode': NVARCHAR(20),
    'SUPPLIER_SUPPLIERS_Country': NVARCHAR(100),
    'SUPPLIER_SUPPLIERS_Phone': VARCHAR(20),
    'SUPPLIER_SUPPLIERS_Fax': NVARCHAR(30),
    'SUPPLIER_SUPPLIERS_HomePage': NVARCHAR(255)
}

prepare_and_insert(suppliers, suppliers_dtypes, 'Supplier')

Inserting data into table: Supplier with chunk size: 166


### Currency

In [465]:
currencies_dtypes = {
    'CURRENCY_CURRENCY_CurrencyCode': NVARCHAR(10),
    'CURRENCY_CURRENCY_Name': NVARCHAR(100),
    'CURRENCY_COUNTRYREGIONCURRENCY_CountryRegionCode': NVARCHAR(10),
    'CURRENCY_CURRENCYRATE_CurrencyRateID': Integer,
    'CURRENCY_CURRENCYRATE_CurrencyRateDate': DATE,
    'CURRENCY_CURRENCYRATE_FromCurrencyCode': NVARCHAR(10),
    'CURRENCY_CURRENCYRATE_ToCurrencyCode': NVARCHAR(10),
    'CURRENCY_CURRENCYRATE_AverageRate': Integer,
    'CURRENCY_CURRENCYRATE_EndOfDayRate': Integer
}

prepare_and_insert(currencies, currencies_dtypes, 'Currency')

Inserting data into table: Currency with chunk size: 222


### Territory

In [464]:
territories_dtypes = {
    'TERRITORY_REGION_RegionID': Integer,
    'TERRITORY_REGION_RegionDescription': String,
    'TERRITORY_TERRITORIES_TerritoryID': Integer,
    'TERRITORY_TERRITORIES_TerritoryDescription': String,
    'TERRITORY_REGION_Region': NVARCHAR(50),
    'TERRITORY_STATE_StateID': NVARCHAR(10),
    'TERRITORY_STATE_StateName': NVARCHAR(100),
    'TERRITORY_STATE_StateCapital': NVARCHAR(50),
    'TERRITORY_STATE_Country': NVARCHAR(50),
    'TERRITORY_COUNTRYREGION_CountryRegionCode': NVARCHAR(10),
    'TERRITORY_COUNTRYREGION_Name': NVARCHAR(100),
    'TERRITORY_STATEPROVINCE_StateProvinceID': Integer,
    'TERRITORY_STATEPROVINCE_StateProvinceCode': NVARCHAR(10),
    'TERRITORY_STATEPROVINCE_IsOnlyStateProvinceFlag': CHAR(1),
    'TERRITORY_STATEPROVINCE_Name': NVARCHAR(100),
    'TERRITORY_SALESTAXRATE_SalesTaxRateID': Integer,
    'TERRITORY_SALESTAXRATE_TaxType': NVARCHAR(50),
    'TERRITORY_SALESTAXRATE_TaxRate': DECIMAL(8,2),
    'TERRITORY_SALESTAXRATE_Name': NVARCHAR(150)
}

prepare_and_insert(territories, territories_dtypes, 'Territory')

Inserting data into table: Territory with chunk size: 105


## Constraints
altering the tables to add the (foreign key) constraints

In [None]:
# opening the UnitedOutdoors_constraints.sql file
with open('sql/UnitedOutdoors_constraints.sql', 'r') as file:
    sql_script = file.read()

# Execute the script
split_and_execute_sql_script(sql_script, united_outdoors_engine)

# Closing connections

In [None]:
try:
    united_outdoors_engine.dispose()
    northwind_engine.dispose()
    aenc_engine.dispose()
    adventureworks_engine.dispose()
except OperationalError as e:
        print(f'Error: {e}')

# Time elapsed

In [None]:
end_time = time.time()

print(f'Time elapsed: {end_time - start_time} seconds')

Time elapsed: 405.8673257827759 seconds
