In [28]:
from sqlalchemy import create_engine
import pandas as pd

# Create engine using SQLAlchemy for both databases
engine_cm = create_engine('mssql+pyodbc://ABDALLAH\\MSSQLSERVER2/CustomerManagement?trusted_connection=yes&driver=SQL+Server')
engine_dwh = create_engine('mssql+pyodbc://ABDALLAH\\MSSQLSERVER2/CustomerManagementDWH?trusted_connection=yes&driver=SQL+Server')

def get_table_names(engine):
    """Fetch all table names using SQLAlchemy."""
    query = "SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE'"
    return pd.read_sql(query, engine)

def get_table_info(engine, table_name):
    """Get column information and row count for a specific table using SQLAlchemy."""
    columns_query = f"""
    SELECT COLUMN_NAME, DATA_TYPE 
    FROM INFORMATION_SCHEMA.COLUMNS 
    WHERE TABLE_NAME = '{table_name}'
    """
    row_count_query = f"SELECT COUNT(*) FROM {table_name}"
    
    columns_info = pd.read_sql(columns_query, engine)
    try:
        row_count_info = pd.read_sql(row_count_query, engine)
        row_count_info.columns = ['RowCount']  # Rename the column after fetching
    except Exception as e:
        row_count_info = pd.DataFrame({'RowCount': [0]})
        print(f"Error fetching row count for {table_name}: {e}")

    return columns_info, row_count_info

# Fetch table information using SQLAlchemy
print("CustomerManagement Database Tables:")
cm_tables = get_table_names(engine_cm)
print(cm_tables)

print("\nCustomerManagementDWH Database Tables:")
dwh_tables = get_table_names(engine_dwh)
print(dwh_tables)

# Loop through all tables and get column and row count info
print("\nFetching table info for CustomerManagement database:")
for index, row in cm_tables.iterrows():
    table_name = row['TABLE_NAME']
    columns_info, row_count_info = get_table_info(engine_cm, table_name)
    print(f"\nColumns in {table_name}:")
    print(columns_info)
    print(f"Row count in {table_name}:")
    print(row_count_info)

print("\nFetching table info for CustomerManagementDWH database:")
for index, row in dwh_tables.iterrows():
    table_name = row['TABLE_NAME']
    columns_info, row_count_info = get_table_info(engine_dwh, table_name)
    print(f"\nColumns in {table_name}:")
    print(columns_info)
    print(f"Row count in {table_name}:")
    print(row_count_info)


CustomerManagement Database Tables:
                TABLE_NAME
0                   Brands
1                 Products
2                   Orders
3               OrderItems
4          CustomerReviews
5                Wishlists
6             ShoppingCart
7                 Payments
8   CustomerSupportTickets
9              sysdiagrams
10               Customers
11     CustomerPreferences
12              Categories

CustomerManagementDWH Database Tables:
      TABLE_NAME
0    Address_Dim
1  Customers_Dim
2       Date_Dim
3    Orders_Fact
4   Payments_Dim
5   Products_Dim

Fetching table info for CustomerManagement database:

Columns in Brands:
   COLUMN_NAME DATA_TYPE
0      BrandID       int
1    BrandName  nvarchar
2  Description  nvarchar
Row count in Brands:
   RowCount
0        15

Columns in Products:
     COLUMN_NAME DATA_TYPE
0      ProductID       int
1    ProductName  nvarchar
2     CategoryID       int
3        BrandID       int
4          Price   decimal
5  StockQuantity       i

In [13]:
## Data Profiling & Validation
##Step 1.1: Review the Database Schema


In [14]:
import pandas as pd
from sqlalchemy import create_engine

# Create SQLAlchemy engine for the CustomerManagement database
engine_cm = create_engine('mssql+pyodbc://ABDALLAH\\MSSQLSERVER2/CustomerManagement?trusted_connection=yes&driver=SQL+Server')

# Function to extract data from a table
def extract_table_data(engine, table_name):
    """Extract data from a given table."""
    query = f"SELECT * FROM {table_name}"
    return pd.read_sql(query, engine)

# Function to perform basic data profiling
def profile_table_data(df):
    """Perform basic data profiling on a DataFrame."""
    print(f"Profiling table: {df.name}")
    print(f"Shape of the table (rows, columns): {df.shape}")
    print("\nData Types:\n", df.dtypes)
    print("\nNull Values per column:\n", df.isnull().sum())
    print("\nDuplicated rows:", df.duplicated().sum())
    print("\nSummary statistics:\n", df.describe(include='all'))

# List of tables we want to profile from CustomerManagement database
tables_to_profile = ['Customers', 'Orders', 'Products']

# Loop through each table, extract the data, and profile it
for table_name in tables_to_profile:
    # Extract the data from each table
    df = extract_table_data(engine_cm, table_name)
    
    # Set the name attribute for profiling output
    df.name = table_name

    # Perform data profiling
    profile_table_data(df)

    # Optional: display first few rows to inspect
    print(f"\nFirst 5 rows of {table_name}:\n", df.head())
    print("\n" + "="*50 + "\n")


Profiling table: Customers
Shape of the table (rows, columns): (200, 9)

Data Types:
 CustomerID          int64
FullName           object
Email              object
PhoneNumber        object
ShippingAddress    object
BillingAddress     object
DateOfBirth        object
LoyaltyPoints       int64
Preferences        object
dtype: object

Null Values per column:
 CustomerID           0
FullName             0
Email                0
PhoneNumber          0
ShippingAddress      0
BillingAddress       0
DateOfBirth          0
LoyaltyPoints        0
Preferences        200
dtype: int64

Duplicated rows: 0

Summary statistics:
         CustomerID         FullName                     Email   PhoneNumber  \
count   200.000000              200                       200           200   
unique         NaN              200                       200           200   
top            NaN  Dr. Melody Clay  mirandadavid@example.org  739-638-3953   
freq           NaN                1                         1 

In [15]:
# Data Cleansing
def cleanse_data_customers(df):
    """Perform data cleansing on the Customers table."""
    
    # Handle 'Preferences' column - since it's entirely null, we can either drop it or fill it with a default value
    df_cleaned = df.copy()
    df_cleaned['Preferences'].fillna('No Preferences', inplace=True)  # Fill with a default string
    
    return df_cleaned

def cleanse_data_orders(df):
    """Perform data cleansing on the Orders table."""
    # Currently, no specific cleansing needed for Orders
    return df

def cleanse_data_products(df):
    """Perform data cleansing on the Products table."""
    
    # Ensure no negative values in Price and Rating
    df_cleaned = df.copy()
    df_cleaned['Price'] = df_cleaned['Price'].apply(lambda x: max(x, 0))  # Remove negative prices
    df_cleaned['Rating'] = df_cleaned['Rating'].apply(lambda x: max(min(x, 5), 0))  # Ensure Rating is between 0 and 5
    
    return df_cleaned

# Apply data cleansing to each table
tables_to_cleanse = {
    'Customers': cleanse_data_customers,
    'Orders': cleanse_data_orders,
    'Products': cleanse_data_products
}

# Loop through each table, cleanse it, and show summary after cleansing
for table_name, cleanse_function in tables_to_cleanse.items():
    df = extract_table_data(engine_cm, table_name)
    df_cleaned = cleanse_function(df)
    
    # Show the first 5 rows of cleaned data
    print(f"\nCleaned data for {table_name} (first 5 rows):")
    print(df_cleaned.head())
    print("\n" + "="*50 + "\n")



Cleaned data for Customers (first 5 rows):
   CustomerID         FullName                       Email      PhoneNumber  \
0          15  Dr. Melody Clay    mirandadavid@example.org     739-638-3953   
1          16  Jennifer Miller         ubarber@example.net    (400)790-7347   
2          17      Javier Hart  pettychristine@example.org  901.423.9751x74   
3          18  William Richard         jburton@example.com  (896)506-7433x6   
4          19  Benjamin Dennis      rodneywood@example.net  001-876-998-124   

                                     ShippingAddress  \
0  9057 Haynes Parkway Suite 524\nPort Michaelton...   
1       7447 Richardson Oval\nEast Michael, WV 51486   
2          37613 Ashley Brooks\nNew Debbie, PR 61066   
3        608 Martinez Rest\nClaytonchester, NC 53913   
4    296 Justin Road Suite 098\nNew Autumn, NE 58405   

                                      BillingAddress DateOfBirth  \
0  50563 Collins Cape Apt. 617\nSouth Angelaland,...  1995-04-22   
1     35

In [16]:
from datetime import datetime
import re

# Transformation functions
def transform_customers(df):
    """Transform Customers data to align with DWH schema."""
    df_transformed = df.copy()
    
    # Normalize PhoneNumber (remove special characters like (), -, and spaces)
    df_transformed['PhoneNumber'] = df_transformed['PhoneNumber'].apply(lambda x: re.sub(r'\D', '', x))
    
    # Ensure DateOfBirth is in YYYY-MM-DD format
    df_transformed['DateOfBirth'] = pd.to_datetime(df_transformed['DateOfBirth']).dt.strftime('%Y-%m-%d')
    
    return df_transformed

def transform_orders(df):
    """Transform Orders data to align with DWH schema."""
    df_transformed = df.copy()
    
    # Ensure OrderDate is in YYYY-MM-DD format
    df_transformed['OrderDate'] = pd.to_datetime(df_transformed['OrderDate']).dt.strftime('%Y-%m-%d')
    
    # Map PaymentMethod and OrderStatus to predefined IDs (example mapping)
    payment_method_map = {'PayPal': 1, 'Credit Card': 2, 'Bank Transfer': 3}
    order_status_map = {'Shipped': 1, 'Delivered': 2, 'Cancelled': 3, 'Pending': 4}
    
    df_transformed['PaymentMethod'] = df_transformed['PaymentMethod'].map(payment_method_map)
    df_transformed['OrderStatus'] = df_transformed['OrderStatus'].map(order_status_map)
    
    return df_transformed

def transform_products(df):
    """Transform Products data to align with DWH schema."""
    df_transformed = df.copy()
    
    # Ensure Price and Rating have valid ranges
    df_transformed['Price'] = df_transformed['Price'].apply(lambda x: max(x, 0))  # Ensure Price >= 0
    df_transformed['Rating'] = df_transformed['Rating'].apply(lambda x: max(min(x, 5), 0))  # Ensure Rating is between 0 and 5
    
    return df_transformed

# Apply transformation to each table
tables_to_transform = {
    'Customers': transform_customers,
    'Orders': transform_orders,
    'Products': transform_products
}

# Loop through each table, transform it, and show summary after transformation
for table_name, transform_function in tables_to_transform.items():
    df = extract_table_data(engine_cm, table_name)
    df_transformed = transform_function(df)
    
    # Show the first 5 rows of transformed data
    print(f"\nTransformed data for {table_name} (first 5 rows):")
    print(df_transformed.head())
    print("\n" + "="*50 + "\n")



Transformed data for Customers (first 5 rows):
   CustomerID         FullName                       Email   PhoneNumber  \
0          15  Dr. Melody Clay    mirandadavid@example.org    7396383953   
1          16  Jennifer Miller         ubarber@example.net    4007907347   
2          17      Javier Hart  pettychristine@example.org  901423975174   
3          18  William Richard         jburton@example.com   89650674336   
4          19  Benjamin Dennis      rodneywood@example.net  001876998124   

                                     ShippingAddress  \
0  9057 Haynes Parkway Suite 524\nPort Michaelton...   
1       7447 Richardson Oval\nEast Michael, WV 51486   
2          37613 Ashley Brooks\nNew Debbie, PR 61066   
3        608 Martinez Rest\nClaytonchester, NC 53913   
4    296 Justin Road Suite 098\nNew Autumn, NE 58405   

                                      BillingAddress DateOfBirth  \
0  50563 Collins Cape Apt. 617\nSouth Angelaland,...  1995-04-22   
1     3507 Steven Expr

In [17]:
import re
import hashlib

def additional_transformations(df_customers, df_orders, df_products):
    # Normalize Email to lowercase
    df_customers['Email'] = df_customers['Email'].str.lower()
    
    # Capitalize first letter of each word in FullName
    df_customers['FullName'] = df_customers['FullName'].str.title()

    # Trim whitespace from text columns
    df_customers = df_customers.applymap(lambda x: x.strip() if isinstance(x, str) else x)
    df_orders = df_orders.applymap(lambda x: x.strip() if isinstance(x, str) else x)
    df_products = df_products.applymap(lambda x: x.strip() if isinstance(x, str) else x)
    
    # Validate Dates (check for future dates in DateOfBirth and OrderDate)
    df_customers['DateOfBirth'] = pd.to_datetime(df_customers['DateOfBirth'])
    df_orders['OrderDate'] = pd.to_datetime(df_orders['OrderDate'])
    
    today = pd.to_datetime("today")
    df_customers.loc[df_customers['DateOfBirth'] > today, 'DateOfBirth'] = None
    df_orders.loc[df_orders['OrderDate'] > today, 'OrderDate'] = None
    
    # Ensure LoyaltyPoints are non-negative
    df_customers['LoyaltyPoints'] = df_customers['LoyaltyPoints'].apply(lambda x: max(x, 0))

    # Generate hashed Email for anonymization
    df_customers['EmailHash'] = df_customers['Email'].apply(lambda x: hashlib.sha256(x.encode()).hexdigest())
    
    # Verify Foreign Key Integrity (assuming you have foreign key reference tables for CategoryID and BrandID)
    valid_category_ids = set(df_products['CategoryID'].unique())  # Assume you have a list of valid category IDs
    valid_brand_ids = set(df_products['BrandID'].unique())
    
    df_products = df_products[df_products['CategoryID'].isin(valid_category_ids)]
    df_products = df_products[df_products['BrandID'].isin(valid_brand_ids)]
    
    return df_customers, df_orders, df_products

# Applying additional transformations
df_customers_transformed, df_orders_transformed, df_products_transformed = additional_transformations(
    df_customers, df_orders, df_products)

# Display transformed data
print("Further Transformed Customers (first 5 rows):")
print(df_customers_transformed.head())

print("\nFurther Transformed Orders (first 5 rows):")
print(df_orders_transformed.head())

print("\nFurther Transformed Products (first 5 rows):")
print(df_products_transformed.head())


Further Transformed Customers (first 5 rows):
   CustomerID         FullName                       Email      PhoneNumber  \
0          15  Dr. Melody Clay    mirandadavid@example.org     739-638-3953   
1          16  Jennifer Miller         ubarber@example.net    (400)790-7347   
2          17      Javier Hart  pettychristine@example.org  901.423.9751x74   
3          18  William Richard         jburton@example.com  (896)506-7433x6   
4          19  Benjamin Dennis      rodneywood@example.net  001-876-998-124   

                                     ShippingAddress  \
0  9057 Haynes Parkway Suite 524\nPort Michaelton...   
1       7447 Richardson Oval\nEast Michael, WV 51486   
2          37613 Ashley Brooks\nNew Debbie, PR 61066   
3        608 Martinez Rest\nClaytonchester, NC 53913   
4    296 Justin Road Suite 098\nNew Autumn, NE 58405   

                                      BillingAddress DateOfBirth  \
0  50563 Collins Cape Apt. 617\nSouth Angelaland,...  1995-04-22   
1     

In [18]:
import pandas as pd
import numpy as np
from datetime import datetime

# Sample function for customers transformations
def transform_customers(df):
    # Remove duplicates based on Email
    df.drop_duplicates(subset='Email', keep='first', inplace=True)

    # Calculate Age
    df['Age'] = (datetime.now() - pd.to_datetime(df['DateOfBirth'])).dt.days // 365

    # Categorize Loyalty Points
    bins = [0, 199, 399, 599, np.inf]
    labels = ['Bronze', 'Silver', 'Gold', 'Platinum']
    df['LoyaltyCategory'] = pd.cut(df['LoyaltyPoints'], bins=bins, labels=labels, right=False)

    # Standardize Address
    df['ShippingAddress'] = df['ShippingAddress'].str.replace(r'\n', ', ')  # Replace newlines with commas

    # Split FullName into FirstName and LastName
    df[['FirstName', 'LastName']] = df['FullName'].str.split(' ', n=1, expand=True)

    return df

# Sample function for orders transformations
def transform_orders(df):
    # Categorize Order Status
    status_mapping = {1: 'Pending', 2: 'Completed', 3: 'Cancelled'}
    df['OrderStatus'] = df['OrderStatus'].map(status_mapping)

    # Calculate Order Age
    df['OrderAge'] = (datetime.now() - pd.to_datetime(df['OrderDate'])).dt.days

    # Total Items Count (assuming you have an order items DataFrame)
    # df['TotalItemsCount'] = df_items.groupby('OrderID')['Quantity'].sum().reindex(df['OrderID']).fillna(0)

    return df

# Sample function for products transformations
def transform_products(df):
    # Price Ranges
    bins = [0, 20, 50, 100, np.inf]
    labels = ['Low', 'Medium', 'High', 'Luxury']
    df['PriceCategory'] = pd.cut(df['Price'], bins=bins, labels=labels, right=False)

    # Stock Alerts
    df['LowStock'] = df['StockQuantity'] < 10  # Flag products with low stock

    # Average Rating (assuming you have ratings data)
    # df['AverageRating'] = df_reviews.groupby('ProductID')['Rating'].mean().reindex(df['ProductID']).fillna(0)

    return df

# Apply transformations to each table
df_customers_transformed = transform_customers(df_customers)
df_orders_transformed = transform_orders(df_orders)
df_products_transformed = transform_products(df_products)

# Display transformed data
print("Further Transformed Customers (first 5 rows):")
print(df_customers_transformed.head())

print("\nFurther Transformed Orders (first 5 rows):")
print(df_orders_transformed.head())

print("\nFurther Transformed Products (first 5 rows):")
print(df_products_transformed.head())


Further Transformed Customers (first 5 rows):
   CustomerID         FullName                       Email      PhoneNumber  \
0          15  Dr. Melody Clay    mirandadavid@example.org     739-638-3953   
1          16  Jennifer Miller         ubarber@example.net    (400)790-7347   
2          17      Javier Hart  pettychristine@example.org  901.423.9751x74   
3          18  William Richard         jburton@example.com  (896)506-7433x6   
4          19  Benjamin Dennis      rodneywood@example.net  001-876-998-124   

                                     ShippingAddress  \
0  9057 Haynes Parkway Suite 524\nPort Michaelton...   
1       7447 Richardson Oval\nEast Michael, WV 51486   
2          37613 Ashley Brooks\nNew Debbie, PR 61066   
3        608 Martinez Rest\nClaytonchester, NC 53913   
4    296 Justin Road Suite 098\nNew Autumn, NE 58405   

                                      BillingAddress DateOfBirth  \
0  50563 Collins Cape Apt. 617\nSouth Angelaland,...  1995-04-22   
1     

In [20]:
import pandas as pd

# Assuming df_customers, df_orders, and df_products are your dataframes

# Customers Transformations
df_customers['EmailDomain'] = df_customers['Email'].str.split('@').str[1]
df_customers['PhoneNumber'] = df_customers['PhoneNumber'].replace(r'\D+', '', regex=True).str.replace(r'(\d{3})(\d{3})(\d{4})', r'(\1) \2-\3')
df_customers['CustomerSegment'] = pd.cut(df_customers['LoyaltyPoints'], bins=[0, 200, 400, float('inf')], labels=['Regular', 'Valued', 'Premium'])

# Orders Transformations
df_orders['OrderDate'] = pd.to_datetime(df_orders['OrderDate'])
df_orders['Year'] = df_orders['OrderDate'].dt.year
df_orders['Month'] = df_orders['OrderDate'].dt.month
df_orders['Day'] = df_orders['OrderDate'].dt.day
df_orders['OrderFrequency'] = df_orders.groupby('CustomerID')['OrderID'].transform('count')

# Products Transformations
# Example for a hypothetical currency normalization (just a placeholder)
df_products['Price'] = df_products['Price'].apply(lambda x: round(x * 1.1, 2))  # Adjusting prices by a factor for demo
df_products['BrandPopularity'] = df_products['BrandID'].map(df_products['BrandID'].value_counts())
df_products['LifecycleStage'] = pd.cut(df_products['StockQuantity'], bins=[0, 10, 50, float('inf')], labels=['Discontinued', 'In Stock', 'New'])

# Example of discount flag based on price
average_price = df_products['Price'].mean()
df_products['Discounted'] = df_products['Price'] < average_price


In [21]:
# Display the first few rows of each transformed DataFrame

# For Customers
print("Transformed Customers Data:")
print(df_customers.head())  # Display the first 5 rows of the transformed customers DataFrame

# For Orders
print("\nTransformed Orders Data:")
print(df_orders.head())  # Display the first 5 rows of the transformed orders DataFrame

# For Products
print("\nTransformed Products Data:")
print(df_products.head())  # Display the first 5 rows of the transformed products DataFrame


Transformed Customers Data:
   CustomerID         FullName                       Email   PhoneNumber  \
0          15  Dr. Melody Clay    mirandadavid@example.org    7396383953   
1          16  Jennifer Miller         ubarber@example.net    4007907347   
2          17      Javier Hart  pettychristine@example.org  901423975174   
3          18  William Richard         jburton@example.com   89650674336   
4          19  Benjamin Dennis      rodneywood@example.net  001876998124   

                                     ShippingAddress  \
0  9057 Haynes Parkway Suite 524\nPort Michaelton...   
1       7447 Richardson Oval\nEast Michael, WV 51486   
2          37613 Ashley Brooks\nNew Debbie, PR 61066   
3        608 Martinez Rest\nClaytonchester, NC 53913   
4    296 Justin Road Suite 098\nNew Autumn, NE 58405   

                                      BillingAddress DateOfBirth  \
0  50563 Collins Cape Apt. 617\nSouth Angelaland,...  1995-04-22   
1     3507 Steven Expressway\nNorth Samuel

In [22]:
from sqlalchemy import create_engine

# Create SQLAlchemy engine for the CustomerManagementDWH database
engine_dwh = create_engine('mssql+pyodbc://ABDALLAH\\MSSQLSERVER2/CustomerManagementDWH?trusted_connection=yes&driver=SQL+Server')


In [24]:
print(df_customers_transformed.dtypes)


CustomerID            int64
FullName             object
Email                object
PhoneNumber          object
ShippingAddress      object
BillingAddress       object
DateOfBirth          object
LoyaltyPoints         int64
Preferences          object
Age                   int64
LoyaltyCategory    category
FirstName            object
LastName             object
EmailDomain          object
CustomerSegment    category
dtype: object


In [25]:
from sqlalchemy.types import Integer, String, DateTime

dtype_dict = {
    'CustomerID': Integer,
    'FullName': String(255),  # Define a maximum length
    'Email': String(255),
    'PhoneNumber': String(20),  # Adjust as needed for phone numbers
    'ShippingAddress': String(255),
    'BillingAddress': String(255),
    'DateOfBirth': DateTime,  # Change to datetime
    'LoyaltyPoints': Integer,
    'Preferences': String(255),  # Adjust if needed
    'Age': Integer,
    'LoyaltyCategory': String(50),  # Convert category to string
    'FirstName': String(100),  # Adjust length as needed
    'LastName': String(100),
    'EmailDomain': String(100),
    'CustomerSegment': String(50),  # Convert category to string
}

# Convert DateOfBirth to datetime if it’s not already
df_customers_transformed['DateOfBirth'] = pd.to_datetime(df_customers_transformed['DateOfBirth'], errors='coerce')

# Check for any invalid date conversions
print(df_customers_transformed['DateOfBirth'].isnull().sum(), "invalid DateOfBirth entries")

# Load Customers data into DWH
df_customers_transformed.to_sql('Customers', con=engine_dwh, if_exists='replace', index=False, dtype=dtype_dict)


0 invalid DateOfBirth entries


DBAPIError: (pyodbc.Error) ('HY104', '[HY104] [Microsoft][ODBC SQL Server Driver]Invalid precision value (0) (SQLBindParameter)')
[SQL: SELECT [INFORMATION_SCHEMA].[TABLES].[TABLE_NAME] 
FROM [INFORMATION_SCHEMA].[TABLES] 
WHERE ([INFORMATION_SCHEMA].[TABLES].[TABLE_TYPE] = CAST(? AS NVARCHAR(max)) OR [INFORMATION_SCHEMA].[TABLES].[TABLE_TYPE] = CAST(? AS NVARCHAR(max))) AND [INFORMATION_SCHEMA].[TABLES].[TABLE_NAME] = CAST(? AS NVARCHAR(max)) AND [INFORMATION_SCHEMA].[TABLES].[TABLE_SCHEMA] = CAST(? AS NVARCHAR(max))]
[parameters: ('BASE TABLE', 'VIEW', 'Customers', 'dbo')]
(Background on this error at: https://sqlalche.me/e/20/dbapi)

In [29]:
def fetch_table_info(engine, db_name):
    query = f"""
    SELECT 
        t.name AS TABLE_NAME, 
        SUM(p.rows) AS RowCount
    FROM 
        sys.tables t
        INNER JOIN sys.partitions p ON t.object_id = p.object_id
        INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
    WHERE 
        p.index_id IN (0, 1)
    GROUP BY 
        t.name
    ORDER BY 
        t.name;
    """
    table_info = pd.read_sql(query, engine)
    print(f"Tables in {db_name} Database:")
    print(table_info)

# Fetching table information for both databases
fetch_table_info(engine_cm, 'CustomerManagement')
fetch_table_info(engine_dwh, 'CustomerManagementDWH')


DBAPIError: (pyodbc.Error) ('01000', '[01000] [Microsoft][ODBC SQL Server Driver][DBMSLPCN]ConnectionWrite (WrapperWrite()). (233) (SQLExecDirectW); [01000] [Microsoft][ODBC SQL Server Driver][DBMSLPCN]General network error. Check your network documentation. (11)')
[SQL: 
    SELECT 
        t.name AS TABLE_NAME, 
        SUM(p.rows) AS RowCount
    FROM 
        sys.tables t
        INNER JOIN sys.partitions p ON t.object_id = p.object_id
        INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
    WHERE 
        p.index_id IN (0, 1)
    GROUP BY 
        t.name
    ORDER BY 
        t.name;
    ]
(Background on this error at: https://sqlalche.me/e/20/dbapi)