Empty de tabellen

In [1]:
import pyodbc

conn = pyodbc.connect(
    'DRIVER={ODBC Driver 17 for SQL Server};'
    'SERVER=MSI\\SQLEXPRESS;' 
    'DATABASE=sdm;' 
    'Trusted_Connection=yes;'
)

cursor = conn.cursor()

tables_query = """
SELECT DISTINCT OBJECT_NAME(parent_object_id) AS TableName
FROM sys.foreign_keys
"""

try:
    cursor.execute(tables_query)
    tables_with_constraints = [row[0] for row in cursor.fetchall()]

    for table in tables_with_constraints:
        print(f"  Disabling constraints for {table}...")
        cursor.execute(f"ALTER TABLE [{table}] NOCHECK CONSTRAINT ALL")
        conn.commit()

    print("All constraints disabled")
except Exception as e:
    print(f"Error disabling constraints: {str(e)}")
    # Continue anyway - we'll still try to import data


cursor.execute("SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE'")
all_tables = [row[0] for row in cursor.fetchall()]


for table_name in all_tables:
    print(f" Legen van tabel: {table_name}")
    try:
        cursor.execute(f"DELETE FROM [{table_name}];")
        conn.commit()
    except Exception as e:
        print(f"Error deleting from {table_name}: {str(e)}")


try:
    # Re-enable constraints for each table individually
    for table in tables_with_constraints:
        print(f"  Re-enabling constraints for {table}...")
        cursor.execute(f"ALTER TABLE [{table}] WITH CHECK CHECK CONSTRAINT ALL")
        conn.commit()

    print("All constraints re-enabled successfully")
except Exception as e:
    print(f"Error re-enabling constraints: {str(e)}")

print(" Alle tabellen in de database 'sdm' zijn geleegd.")

cursor.close()
conn.close()

  Disabling constraints for country...
  Disabling constraints for order_details...
  Disabling constraints for order_header...
  Disabling constraints for product...
  Disabling constraints for product_type...
  Disabling constraints for retailer...
  Disabling constraints for retailer_contact...
  Disabling constraints for retailer_headquarters...
  Disabling constraints for retailer_site...
  Disabling constraints for returned_item...
  Disabling constraints for sales_branch...
  Disabling constraints for sales_demographic...
  Disabling constraints for sales_staff...
  Disabling constraints for training...
All constraints disabled
 Legen van tabel: age_group
 Legen van tabel: country
 Legen van tabel: course
 Legen van tabel: order_details
 Legen van tabel: order_header
 Legen van tabel: order_method
 Legen van tabel: product
 Legen van tabel: product_line
 Legen van tabel: product_type
 Legen van tabel: retailer
 Legen van tabel: retailer_contact
 Legen van tabel: retailer_headqua

# begin import van data

In [3]:
import pandas as pd
import sqlite3
import pyodbc
import os
import numpy as np

# Database paths
db_paths = {
    'sales': 'go_sales_train.sqlite',
    'crm': 'go_crm_train.sqlite',
    'staff': 'go_staff_train.sqlite'
}

csv_files = {
    "INVENTORY_LEVELS": 'inventory_levels_train.csv',
    "PRODUCT_FORECAST": 'product_forecast_train.csv',
}

# Direct SQL Server connection with pyodbc
server = 'localhost\\SQLEXPRESS'  # Change to your SQL Server instance
database = 'sdm'
conn_str = f'DRIVER={{SQL Server}};SERVER={server};DATABASE={database};Trusted_Connection=yes;'
sql_conn = pyodbc.connect(conn_str)

# Tables to merge from different databases
# Format: table_name: {db_name: [column_list], 'column_mapping': {source_column: target_column}}
merged_tables = {
    'COUNTRY': {
        'sales': ['COUNTRY_CODE', 'COUNTRY', 'LANGUAGE', 'CURRENCY_NAME'],
        'crm': ['COUNTRY_CODE', 'COUNTRY_EN', 'FLAG_IMAGE', 'SALES_TERRITORY_CODE'],

    },
    'SALES_BRANCH': {
        'sales': ['SALES_BRANCH_CODE', 'ADDRESS1', 'ADDRESS2', 'CITY', 'REGION', 'POSTAL_ZONE', 'COUNTRY_CODE'],
        'staff': ['SALES_BRANCH_CODE', 'ADDRESS1', 'ADDRESS2', 'CITY', 'REGION', 'POSTAL_ZONE', 'COUNTRY_CODE']
    },
    'SALES_STAFF': {
        'sales': ['SALES_STAFF_CODE', 'FIRST_NAME', 'LAST_NAME', 'POSITION_EN', 'WORK_PHONE', 'EXTENSION', 'FAX', 'EMAIL', 'DATE_HIRED', 'SALES_BRANCH_CODE'],
        'staff': ['SALES_STAFF_CODE', 'FIRST_NAME', 'LAST_NAME', 'POSITION_EN', 'WORK_PHONE', 'EXTENSION', 'FAX', 'EMAIL', 'DATE_HIRED', 'SALES_BRANCH_CODE', 'MANAGER_CODE']
    },
    'RETAILER_SITE': {
        'sales': ['RETAILER_SITE_CODE', 'RETAILER_CODE', 'ADDRESS1', 'ADDRESS2', 'CITY', 'REGION', 'POSTAL_ZONE', 'COUNTRY_CODE', 'ACTIVE_INDICATOR'],
        'crm': ['RETAILER_SITE_CODE', 'RETAILER_CODE', 'ADDRESS1', 'ADDRESS2', 'CITY', 'REGION', 'POSTAL_ZONE', 'COUNTRY_CODE', 'ACTIVE_INDICATOR']
    }
}

# Disable constraints - use a more direct approach that doesn't depend on sp_MSforeachtable
cursor = sql_conn.cursor()
print("Disabling constraints...")

# Get all tables with constraints
tables_query = """
SELECT DISTINCT OBJECT_NAME(parent_object_id) AS TableName
FROM sys.foreign_keys
"""

try:
    cursor.execute(tables_query)
    tables = [row[0] for row in cursor.fetchall()]
    
    # Disable constraints for each table individually
    for table in tables:
        print(f"  Disabling constraints for {table}...")
        cursor.execute(f"ALTER TABLE [{table}] NOCHECK CONSTRAINT ALL")
        sql_conn.commit()
    
    print("All constraints disabled")
except Exception as e:
    print(f"Error disabling constraints: {str(e)}")
    # Continue anyway - we'll still try to import data

def handle_numpy_types(df):
    """Convert numpy data types to Python native types for pyodbc compatibility"""
    for col in df.columns:
        # Convert numpy.int64/int32 to Python int
        if df[col].dtype == np.int64 or df[col].dtype == np.int32:
            df[col] = df[col].astype(int)
        # Convert numpy.float64/float32 to Python float
        elif df[col].dtype == np.float64 or df[col].dtype == np.float32:
            df[col] = df[col].astype(float)
    return df

def get_sql_column_types(sql_table):
    """Get column data types from SQL Server"""
    cursor = sql_conn.cursor()
    cursor.execute(f"""
        SELECT COLUMN_NAME, DATA_TYPE
        FROM INFORMATION_SCHEMA.COLUMNS
        WHERE TABLE_NAME = '{sql_table}'
    """)
    return {row[0]: row[1] for row in cursor.fetchall()}

def fix_data_types(df, sql_table):
    """Fix data types based on SQL Server column types"""
    sql_types = get_sql_column_types(sql_table)
    
    for col in df.columns:
        if col in sql_types:
            # Handle numeric columns
            if sql_types[col] == 'float' or sql_types[col] == 'real':
                # Convert to float and replace NaN with None
                df[col] = pd.to_numeric(df[col], errors='coerce')
                df[col] = df[col].astype(float)
            
            # Handle integer columns
            elif sql_types[col] in ('int', 'smallint', 'tinyint', 'bigint'):
                # First convert to float in case there are NaN values, then to int
                df[col] = pd.to_numeric(df[col], errors='coerce')
                # Replace NaN with None/NULL
                df[col] = df[col].where(pd.notnull(df[col]), None)
            
            # Handle date columns
            elif sql_types[col] in ('date', 'datetime', 'datetime2'):
                df[col] = pd.to_datetime(df[col], errors='coerce')
    
    return df

def get_primary_key_columns(sql_table):
    """Get primary key columns for a SQL table"""
    cursor = sql_conn.cursor()
    cursor.execute(f"""
        SELECT COLUMN_NAME
        FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
        WHERE OBJECTPROPERTY(OBJECT_ID(CONSTRAINT_SCHEMA + '.' + CONSTRAINT_NAME), 'IsPrimaryKey') = 1
        AND TABLE_NAME = '{sql_table}'
    """)
    return [row[0] for row in cursor.fetchall()]

def check_for_duplicates(df, sql_table):
    """Check if primary key values already exist in SQL Server"""
    pk_columns = get_primary_key_columns(sql_table)
    
    if not pk_columns:
        return df  # No primary key, no need to check
    
    # Check if all primary key columns exist in the dataframe
    if not all(col in df.columns for col in pk_columns):
        print(f"    Warning: Not all primary key columns exist in the dataframe. Skipping duplicate check.")
        return df
    
    # Create a list to keep track of duplicate rows
    duplicate_indices = []
    cursor = sql_conn.cursor()
    
    # Generate WHERE clause to check for existing keys
    for i in range(len(df)):
        where_clauses = []
        for col in pk_columns:
            value = df.iloc[i][col]
            
            if pd.isna(value):
                where_clause = f"{col} IS NULL"
            elif isinstance(value, str):
                where_clause = f"{col} = '{value.replace("'", "''")}'"
            else:
                where_clause = f"{col} = {value}"
                
            where_clauses.append(where_clause)
        
        where_str = " AND ".join(where_clauses)
        query = f"SELECT 1 FROM {sql_table} WHERE {where_str}"
        
        cursor.execute(query)
        if cursor.fetchone():
            # Add this index to our list of duplicates
            duplicate_indices.append(i)
    
    # If we found duplicates, remove them
    if duplicate_indices:
        print(f"    Skipping {len(duplicate_indices)} duplicate rows")
        # Create a new dataframe without the duplicate rows
        df = df.drop(duplicate_indices)
        # Reset the index to avoid issues with missing indices
        df = df.reset_index(drop=True)
    
    return df

def import_merged_table(table_name):
    """Import and merge a table from multiple SQLite databases"""
    print(f"Importing merged table {table_name}...")
    
    all_dfs = []
    db_connections = {}
    
    try:
        # Get SQL Server column info
        cursor = sql_conn.cursor()
        cursor.execute(f"SELECT * FROM {table_name} WHERE 1=0")
        target_columns = [column[0] for column in cursor.description]
        
        # Get column mapping if it exists
        column_mapping = {}
        if 'column_mapping' in merged_tables[table_name]:
            column_mapping = merged_tables[table_name]['column_mapping']
        
        # Connect to each database and load data
        for db_name, columns in merged_tables[table_name].items():
            # Skip the column_mapping entry
            if db_name == 'column_mapping':
                continue
                
            if db_name not in db_connections:
                db_path = db_paths[db_name]
                db_connections[db_name] = sqlite3.connect(db_path)
            
            sqlite_conn = db_connections[db_name]
            
            # Get data from SQLite
            sqlite_table = table_name.lower()  # SQLite table names are lowercase
            query = f"SELECT {', '.join(columns)} FROM {sqlite_table}"
            
            try:
                df = pd.read_sql_query(query, sqlite_conn)
                
                if not df.empty:
                    # Convert column names to uppercase
                    df.columns = [col.upper() for col in df.columns]
                    
                    # Apply column mapping
                    for source_col, target_col in column_mapping.items():
                        if source_col in df.columns:
                            df = df.rename(columns={source_col: target_col})
                    
                    # Add identifying column to track source
                    df['_SOURCE_DB'] = db_name
                    
                    all_dfs.append(df)
                    print(f"  Retrieved {len(df)} rows from {db_name}.{sqlite_table}")
                
            except Exception as e:
                print(f"  Error retrieving data from {db_name}.{sqlite_table}: {str(e)}")
        
        # If no data was retrieved, exit
        if not all_dfs:
            print(f"  No data retrieved for {table_name}. Skipping.")
            return
        
        # Combine dataframes
        combined_df = pd.concat(all_dfs, ignore_index=True)
        
        # Get primary key column(s)
        pk_columns = get_primary_key_columns(table_name)
        
        # If we have a primary key, deduplicate based on it
        if pk_columns and all(col in combined_df.columns for col in pk_columns):
            # Keep the last occurrence of each primary key (assuming later DBs have priority)
            combined_df = combined_df.drop_duplicates(subset=pk_columns, keep='last')
            print(f"  After deduplication: {len(combined_df)} rows")
        
        # For the COUNTRY table specifically, use pd.merge to combine data from both sources
        if table_name == 'COUNTRY':
            print("  Performing special merge for COUNTRY table using pd.merge...")
            
            # Separate dataframes by source
            sales_df = None
            crm_df = None
            
            for df in all_dfs:
                if df['_SOURCE_DB'].iloc[0] == 'sales':
                    sales_df = df.drop('_SOURCE_DB', axis=1)
                elif df['_SOURCE_DB'].iloc[0] == 'crm':
                    crm_df = df.drop('_SOURCE_DB', axis=1)
            
            if sales_df is not None and crm_df is not None:
                # Merge dataframes on COUNTRY_CODE
                merged_df = pd.merge(
                    sales_df, 
                    crm_df, 
                    on='COUNTRY_CODE', 
                    how='outer',
                    suffixes=('', '_crm')
                )
                
                # For columns with the same name that got a suffix, choose the non-null value
                for col in crm_df.columns:
                    if col != 'COUNTRY_CODE' and f'{col}_crm' in merged_df.columns:
                        merged_df[col] = merged_df[col].fillna(merged_df[f'{col}_crm'])
                        merged_df = merged_df.drop(f'{col}_crm', axis=1)
                
                combined_df = merged_df
                print(f"  After COUNTRY pd.merge: {len(combined_df)} rows")
            elif sales_df is not None:
                combined_df = sales_df
                print(f"  Using only sales data for COUNTRY: {len(combined_df)} rows")
            elif crm_df is not None:
                combined_df = crm_df
                print(f"  Using only crm data for COUNTRY: {len(combined_df)} rows")
        
        # Drop the source tracking column if it exists
        if '_SOURCE_DB' in combined_df.columns:
            combined_df = combined_df.drop('_SOURCE_DB', axis=1)
        
        # Filter to columns that exist in SQL Server
        common_columns = [col for col in combined_df.columns if col in target_columns]
        if not common_columns:
            print(f"  No matching columns between combined data and {table_name}. Skipping.")
            return
            
        combined_df = combined_df[common_columns]
        
        # Fix data types based on SQL Server schema
        combined_df = fix_data_types(combined_df, table_name)
        
        # Check for and remove duplicate primary keys with existing data
        combined_df = check_for_duplicates(combined_df, table_name)
        
        if combined_df.empty:
            print(f"  All rows were duplicates. Skipping.")
            return
        
        # Convert numpy types to Python native types
        combined_df = handle_numpy_types(combined_df)
        
        # Generate placeholders for SQL INSERT
        placeholders = ', '.join(['?' for _ in common_columns])
        columns_str = ', '.join(common_columns)
        
        # Prepare insert statement
        insert_sql = f"INSERT INTO {table_name} ({columns_str}) VALUES ({placeholders})"
        
        # Insert data in batches
        batch_size = 1000
        rows_imported = 0
        
        for i in range(0, len(combined_df), batch_size):
            batch_df = combined_df.iloc[i:i+batch_size]
            
            # Convert DataFrame to list of tuples
            batch_data = []
            for _, row in batch_df.iterrows():
                # Replace NaN with None for SQL NULL
                row_tuple = tuple(None if pd.isna(val) else val for val in row)
                batch_data.append(row_tuple)
            
            cursor.executemany(insert_sql, batch_data)
            sql_conn.commit()
            rows_imported += len(batch_data)
        
        print(f"  Successfully imported {rows_imported} rows")
    
    except Exception as e:
        print(f"  Error importing merged table {table_name}: {str(e)}")
        sql_conn.rollback()
    
    finally:
        # Close all database connections
        for conn in db_connections.values():
            try:
                conn.close()
            except:
                pass

def import_table(sqlite_conn, sqlite_table, sql_table, db_name):
    """Import a table from SQLite to SQL Server using pandas as intermediary"""
    # Skip tables that should be merged
    if sql_table in merged_tables and db_name in merged_tables[sql_table]:
        print(f"  Skipping {sqlite_table} -> {sql_table} (will be merged later)")
        return
        
    try:
        print(f"  Importing {sqlite_table} -> {sql_table}")
        
        # Get data from SQLite
        query = f"SELECT * FROM {sqlite_table}"
        df = pd.read_sql_query(query, sqlite_conn)
        
        if df.empty:
            print(f"    Table {sqlite_table} is empty. Skipping.")
            return
        
        # Convert column names to uppercase
        df.columns = [col.upper() for col in df.columns]
        
        # Get column info from SQL Server
        cursor = sql_conn.cursor()
        cursor.execute(f"SELECT * FROM {sql_table} WHERE 1=0")
        columns = [column[0] for column in cursor.description]
        
        # Filter dataframe columns to match SQL Server
        common_columns = [col for col in df.columns if col in columns]
        if not common_columns:
            print(f"    No matching columns between {sqlite_table} and {sql_table}. Skipping.")
            return
            
        df = df[common_columns]
        
        # Fix data types based on SQL Server schema
        df = fix_data_types(df, sql_table)
        
        # Check for and remove duplicate primary keys
        df = check_for_duplicates(df, sql_table)
        
        if df.empty:
            print(f"    All rows were duplicates. Skipping.")
            return
        
        # Convert numpy types to Python native types
        df = handle_numpy_types(df)
        
        # Generate placeholders for SQL INSERT
        placeholders = ', '.join(['?' for _ in common_columns])
        columns_str = ', '.join(common_columns)
        
        # Prepare insert statement
        insert_sql = f"INSERT INTO {sql_table} ({columns_str}) VALUES ({placeholders})"
        
        # Insert data in batches
        batch_size = 1000
        rows_imported = 0
        
        for i in range(0, len(df), batch_size):
            batch_df = df.iloc[i:i+batch_size]
            
            # Convert DataFrame to list of tuples
            batch_data = []
            for _, row in batch_df.iterrows():
                # Replace NaN with None for SQL NULL
                row_tuple = tuple(None if pd.isna(val) else val for val in row)
                batch_data.append(row_tuple)
            
            cursor.executemany(insert_sql, batch_data)
            sql_conn.commit()
            rows_imported += len(batch_data)
            
        print(f"    Successfully imported {rows_imported} rows")
        
    except Exception as e:
        print(f"    Error importing {sqlite_table}: {str(e)}")
        sql_conn.rollback()

def import_csv(csv_path, sql_table):
    """Import a CSV file to SQL Server"""
    try:
        print(f"  Importing {os.path.basename(csv_path)} -> {sql_table}")
        
        # Read CSV
        df = pd.read_csv(csv_path)
        
        # Convert column names to uppercase
        df.columns = [col.upper() for col in df.columns]
        
        # Handle 'Unnamed: 0' column for inventory_levels
        if 'UNNAMED: 0' in df.columns:
            df = df.rename(columns={'UNNAMED: 0': 'ID'})
        
        # Get column info from SQL Server
        cursor = sql_conn.cursor()
        cursor.execute(f"SELECT * FROM {sql_table} WHERE 1=0")
        columns = [column[0] for column in cursor.description]
        
        # Filter dataframe columns to match SQL Server
        common_columns = [col for col in df.columns if col in columns]
        if not common_columns:
            print(f"    No matching columns between CSV and {sql_table}. Skipping.")
            return
            
        df = df[common_columns]
        
        # Fix data types based on SQL Server schema
        df = fix_data_types(df, sql_table)
        
        # Check for and remove duplicate primary keys
        df = check_for_duplicates(df, sql_table)
        
        if df.empty:
            print(f"    All rows were duplicates. Skipping.")
            return
        
        # Convert numpy types to Python native types
        df = handle_numpy_types(df)
        
        # Generate placeholders for SQL INSERT
        placeholders = ', '.join(['?' for _ in common_columns])
        columns_str = ', '.join(common_columns)
        
        # Prepare insert statement
        insert_sql = f"INSERT INTO {sql_table} ({columns_str}) VALUES ({placeholders})"
        
        # Insert data in batches
        batch_size = 1000
        rows_imported = 0
        
        for i in range(0, len(df), batch_size):
            batch_df = df.iloc[i:i+batch_size]
            
            # Convert DataFrame to list of tuples, handling NaN values
            batch_data = []
            for _, row in batch_df.iterrows():
                # Replace NaN with None for SQL NULL
                row_tuple = tuple(None if pd.isna(val) else val for val in row)
                batch_data.append(row_tuple)
            
            cursor.executemany(insert_sql, batch_data)
            sql_conn.commit()
            rows_imported += len(batch_data)
            
        print(f"    Successfully imported {rows_imported} rows")
        
    except Exception as e:
        print(f"    Error importing {csv_path}: {str(e)}")
        sql_conn.rollback()

# Option to truncate tables before import
def truncate_table(table_name):
    """Truncate a table in SQL Server"""
    try:
        cursor = sql_conn.cursor()
        cursor.execute(f"DELETE FROM {table_name}")
        sql_conn.commit()
        print(f"  Truncated table {table_name}")
    except Exception as e:
        print(f"  Error truncating {table_name}: {str(e)}")
        sql_conn.rollback()

# Set this to True if you want to clear tables before importing
TRUNCATE_BEFORE_IMPORT = False  # Change to True if needed

# Store SQLite connections to close later
sqlite_connections = {}

try:
    # First, truncate merged tables if needed
    if TRUNCATE_BEFORE_IMPORT:
        for table_name in merged_tables.keys():
            truncate_table(table_name)
    
    # Process regular (non-merged) tables
    for db_name, db_path in db_paths.items():
        print(f"Importing from {db_name} database...")
        
        # Connect to SQLite
        sqlite_conn = sqlite3.connect(db_path)
        sqlite_connections[db_name] = sqlite_conn
        
        # Get tables
        sqlite_cursor = sqlite_conn.cursor()
        sqlite_cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
        tables = [row[0] for row in sqlite_cursor.fetchall()]
        
        # Import each table
        for table in tables:
            target_table = table.upper()
            
            # Optionally truncate table before import (skip for merged tables)
            if TRUNCATE_BEFORE_IMPORT and target_table not in merged_tables:
                truncate_table(target_table)
            
            import_table(sqlite_conn, table, target_table, db_name)

    # Now process merged tables
    for table_name in merged_tables.keys():
        import_merged_table(table_name)

    # Import CSV files
    for table_name, file_path in csv_files.items():
        # Optionally truncate table before import
        if TRUNCATE_BEFORE_IMPORT:
            truncate_table(table_name)
            
        import_csv(file_path, table_name)

    # Re-enable constraints - use a more direct approach
    print("Re-enabling constraints...")
    sql_cursor = sql_conn.cursor()
    
    # Get all tables with constraints
    tables_query = """
    SELECT DISTINCT OBJECT_NAME(parent_object_id) AS TableName
    FROM sys.foreign_keys
    """
    
    try:
        sql_cursor.execute(tables_query)
        tables = [row[0] for row in sql_cursor.fetchall()]
        
        # Re-enable constraints for each table individually
        for table in tables:
            print(f"  Re-enabling constraints for {table}...")
            sql_cursor.execute(f"ALTER TABLE [{table}] WITH CHECK CHECK CONSTRAINT ALL")
            sql_conn.commit()
        
        print("All constraints re-enabled successfully")
    except Exception as e:
        print(f"Error re-enabling constraints: {str(e)}")
        # Continue anyway - the import is already complete

except Exception as e:
    print(f"Error during import process: {str(e)}")
    if 'sql_conn' in locals() and sql_conn:
        try:
            sql_conn.rollback()
        except:
            pass

finally:
    # Close all SQLite connections
    for conn in sqlite_connections.values():
        try:
            conn.close()
        except:
            pass
            
    # Ensure SQL Server connection is closed properly
    if 'sql_conn' in locals() and sql_conn:
        try:
            sql_conn.close()
            print("SQL Server connection closed")
        except:
            print("Error closing SQL Server connection")

print("Import completed!")


Disabling constraints...
  Disabling constraints for COUNTRY...
  Disabling constraints for INVENTORY_LEVELS...
  Disabling constraints for ORDER_DETAILS...
  Disabling constraints for ORDER_HEADER...
  Disabling constraints for PRODUCT...
  Disabling constraints for PRODUCT_FORECAST...
  Disabling constraints for PRODUCT_TYPE...
  Disabling constraints for RETAILER...
  Disabling constraints for RETAILER_CONTACT...
  Disabling constraints for RETAILER_HEADQUARTERS...
  Disabling constraints for RETAILER_SITE...
  Disabling constraints for RETURNED_ITEM...
  Disabling constraints for SALES_BRANCH...
  Disabling constraints for SALES_DEMOGRAPHIC...
  Disabling constraints for SALES_STAFF...
  Disabling constraints for SATISFACTION...
  Disabling constraints for TRAINING...
All constraints disabled
Importing from sales database...
  Skipping country -> COUNTRY (will be merged later)
  Importing order_details -> ORDER_DETAILS
    Successfully imported 37757 rows
  Importing order_header -

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.to_numeric(df[col], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = df[col].where(pd.notnull(df[col]), None)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.to_numeric(df[col], errors='coerce')
A value is trying to be set on a copy of a slice from 

  Successfully imported 102 rows
Importing merged table RETAILER_SITE...
  Error retrieving data from sales.retailer_site: Execution failed on sql 'SELECT RETAILER_SITE_CODE, RETAILER_CODE, ADDRESS1, ADDRESS2, CITY, REGION, POSTAL_ZONE, COUNTRY_CODE, ACTIVE_INDICATOR FROM retailer_site': no such column: POSTAL_ZONE
  Error retrieving data from crm.retailer_site: Execution failed on sql 'SELECT RETAILER_SITE_CODE, RETAILER_CODE, ADDRESS1, ADDRESS2, CITY, REGION, POSTAL_ZONE, COUNTRY_CODE, ACTIVE_INDICATOR FROM retailer_site': no such column: POSTAL_ZONE
  No data retrieved for RETAILER_SITE. Skipping.
  Importing inventory_levels_train.csv -> INVENTORY_LEVELS
    Successfully imported 3543 rows
  Importing product_forecast_train.csv -> PRODUCT_FORECAST
    Successfully imported 3529 rows
Re-enabling constraints...
  Re-enabling constraints for COUNTRY...
  Re-enabling constraints for INVENTORY_LEVELS...
  Re-enabling constraints for ORDER_DETAILS...
  Re-enabling constraints for ORDER_H