In [52]:
import sys

# !{sys.executable} -m pip install pyodbc pandas 
# !{sys.executable} -m pip install texttable

In [53]:
import os
import pyodbc
import sqlite3
from typing import Dict, List, Optional
import logging
from datetime import datetime

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('access_to_sqlite.log'),
        logging.StreamHandler()
    ]
)

class AccessDBToSQLiteMigrator:
    def __init__(self, access_db_path: str, sqlite_db_path: str):
        """
        Initialize the migrator with paths to source and destination databases.
        
        Args:
            access_db_path: Path to the Microsoft Access database file (.mdb or .accdb)
            sqlite_db_path: Path where the SQLite database will be created
        """
        self.access_db_path = os.path.abspath(access_db_path)
        self.sqlite_db_path = os.path.abspath(sqlite_db_path)
        self.access_conn = None
        self.sqlite_conn = None
        
        # Verify Access DB exists
        if not os.path.exists(self.access_db_path):
            raise FileNotFoundError(f"Access database file not found: {self.access_db_path}")
            
        logging.info(f"Initialized migrator with Access DB: {self.access_db_path}")
        logging.info(f"SQLite DB will be created at: {self.sqlite_db_path}")

    def connect_to_access(self) -> None:
        """Establish connection to the Access database."""
        try:
            # Connection string for Access
            driver = 'Microsoft Access Driver (*.mdb, *.accdb)'
            conn_str = f'DRIVER={{{driver}}};DBQ={self.access_db_path};'
            
            self.access_conn = pyodbc.connect(conn_str)
            logging.info("Successfully connected to Access database")
        except pyodbc.Error as e:
            logging.error(f"Failed to connect to Access database: {e}")
            raise

    def connect_to_sqlite(self) -> None:
        """Create or connect to the SQLite database."""
        try:
            self.sqlite_conn = sqlite3.connect(self.sqlite_db_path)
            sqlite_cursor = self.sqlite_conn.cursor()

            # Enable WAL mode
            sqlite_cursor.execute("PRAGMA journal_mode=WAL;")
            result = sqlite_cursor.fetchone()[0]
            if result != 'wal':
                raise Exception('Failed to enable WAL mode')

            # Set auto-checkpoint (per your setting)
            sqlite_cursor.execute('PRAGMA wal_autocheckpoint=100;')

            # Enable foreign key constraints
            sqlite_cursor.execute("PRAGMA foreign_keys = ON")
            logging.info("Successfully connected to SQLite database")
        except sqlite3.Error as e:
            logging.error(f"Failed to connect to SQLite database: {e}")
            raise

    def get_access_table_names(self) -> List[str]:
        """Get list of all table names in the Access database."""
        cursor = self.access_conn.cursor()
        tables = cursor.tables(tableType='TABLE')
        table_names = [table.table_name for table in tables]
        cursor.close()
        return table_names

    def get_access_table_schema(self, table_name: str) -> Dict:
        """
        Get schema information for a specific table in Access.
        
        Returns:
            Dictionary with column names as keys and their data types as values
        """
        cursor = self.access_conn.cursor()
        columns = cursor.columns(table=table_name)
        
        schema = {}
        for column in columns:
            schema[column.column_name] = column.type_name
        
        cursor.close()
        return schema

    def map_access_to_sqlite_type(self, access_type: str) -> str:
        """
        Map Access data types to SQLite data types.
        
        Args:
            access_type: The Access data type name
            
        Returns:
            Corresponding SQLite data type
        """
        type_mapping = {
            'TEXT': 'TEXT',
            'VARCHAR': 'TEXT',
            'CHAR': 'TEXT',
            'STRING': 'TEXT',
            'MEMO': 'TEXT',  # Long text in Access
            'LONGTEXT': 'TEXT',
            'COUNTER': 'INTEGER',  # AutoNumber in Access
            'INT': 'INTEGER',
            'INTEGER': 'INTEGER',
            'LONG': 'INTEGER',  # AutoNumber in Access
            'AUTOINCREMENT': 'INTEGER',
            'BOOLEAN': 'INTEGER',  # SQLite doesn't have boolean, uses 0/1
            'BIT': 'INTEGER',
            'YESNO': 'INTEGER',
            'BYTE': 'INTEGER',
            'CURRENCY': 'REAL',
            'SINGLE': 'REAL',
            'DOUBLE': 'REAL',
            'FLOAT': 'REAL',
            'REAL': 'REAL',
            'NUMERIC': 'REAL',
            'DECIMAL': 'REAL',
            'MONEY': 'REAL',
            'DATE': 'TEXT',  # Stored as ISO8601 strings (YYYY-MM-DD)
            'DATETIME': 'TEXT',  # Stored as ISO8601 strings (YYYY-MM-DD HH:MM:SS)
            'TIME': 'TEXT',
            'GUID': 'TEXT',
            'BINARY': 'BLOB',
            'VARBINARY': 'BLOB',
            'LONGBINARY': 'BLOB',
            'OLEOBJECT': 'BLOB',
        }
        
        access_type_upper = access_type.upper()
        return type_mapping.get(access_type_upper, 'TEXT')  # Default to TEXT if type not found

    def create_sqlite_table(self, table_name: str, schema: Dict) -> None:
        """
        Create a table in SQLite based on the Access table schema.
        
        Args:
            table_name: Name of the table to create
            schema: Dictionary of column names and their Access data types
        """
        columns = []
        for col_name, col_type in schema.items():
            sqlite_type = self.map_access_to_sqlite_type(col_type)
            columns.append(f'"{col_name}" {sqlite_type}')
        
        # Fixed version - no backslashes in f-string expressions
        create_table_sql = (
            f'CREATE TABLE IF NOT EXISTS "{table_name}" ('
            + ', '.join(columns)
            + ')'
        )
        
        try:
            cursor = self.sqlite_conn.cursor()
            cursor.execute(create_table_sql)
            self.sqlite_conn.commit()
            logging.info(f"Created table '{table_name}' in SQLite")
        except sqlite3.Error as e:
            logging.error(f"Error creating table '{table_name}': {e}")
            raise

    def safe_decode(self, value):
        """
        Safely decodes problematic strings from Access database
        with multiple fallback strategies and better error handling.
        """
        if value is None:
            return None
        if isinstance(value, str):
            return value
        if isinstance(value, bytes):
            # First try with UTF-16-LE (Access default)
            try:
                return value.decode('utf-16-le', errors='strict')
            except UnicodeError as e:
                logging.debug(f"UTF-16-LE decode failed: {e}, trying fallbacks...")
                
                # Try UTF-16-LE with replacement characters
                try:
                    return value.decode('utf-16-le', errors='replace')
                except UnicodeError:
                    pass
                    
                # Try UTF-8 with replacement
                try:
                    return value.decode('utf-8', errors='replace')
                except UnicodeError:
                    pass
                    
                # Try Latin-1 (never fails)
                try:
                    return value.decode('latin-1', errors='replace')
                except:
                    pass
                    
                # Final fallback - return as escaped hex string
                return f"<binary:{value.hex()}>"
        
        # For any other type, convert to string
        return str(value)
    
    def get_row_count(self, db_type: str, table_name: str) -> int:
        """Get row count for a table in either database"""
        cursor = None
        try:
            if db_type == 'access':
                cursor = self.access_conn.cursor()
                cursor.execute(f'SELECT COUNT(*) FROM "{table_name}"')
            else:
                cursor = self.sqlite_conn.cursor()
                cursor.execute(f'SELECT COUNT(*) FROM "{table_name}"')
            return cursor.fetchone()[0]
        except Exception as e:
            logging.error(f"Error getting row count for {table_name}: {e}")
            return -1
        finally:
            if cursor:
                cursor.close()    

    def transfer_table_data(self, table_name: str, batch_size: int = 1000) -> None:
        """
        Transfer data with robust encoding handling and transaction management
        """
        try:
            # Get column names from Access
            access_cursor = self.access_conn.cursor()
            access_cursor.execute(f'SELECT * FROM "{table_name}" WHERE 1=0')
            columns = [column[0] for column in access_cursor.description]
            
            # Prepare SQLite insert statement
            column_list = ', '.join(f'"{col}"' for col in columns)
            placeholders = ', '.join(['?'] * len(columns))
            insert_sql = f'INSERT INTO "{table_name}" ({column_list}) VALUES ({placeholders})'
            
            # Read data in batches
            sqlite_cursor = self.sqlite_conn.cursor()
            access_cursor.execute(f'SELECT * FROM "{table_name}"')
            
            total_rows = 0
            batch_count = 0
            while True:
                batch = access_cursor.fetchmany(batch_size)
                if not batch:
                    break
                
                # Process batch with encoding handling
                processed_rows = []
                for row in batch:
                    try:
                        processed_row = tuple(self.safe_decode(value) for value in row)
                        processed_rows.append(processed_row)
                    except Exception as e:
                        logging.warning(f"Skipping problematic row in {table_name}: {e}")
                        continue
                
                # Insert batch with transaction
                self.sqlite_conn.execute("BEGIN TRANSACTION")
                try:
                    sqlite_cursor.executemany(insert_sql, processed_rows)
                    self.sqlite_conn.commit()
                    rows_added = len(processed_rows)
                    total_rows += rows_added
                    batch_count += 1
                    if batch_count % 10 == 0:  # Log progress every 10 batches
                        logging.info(f"Transferred {total_rows} rows to '{table_name}'")
                except Exception as e:
                    self.sqlite_conn.rollback()
                    logging.error(f"Batch insert failed, rolling back: {e}")
                    raise
                
            logging.info(f"Completed transfer for '{table_name}'. Total rows: {total_rows}")
            
            # Verify counts match
            access_count = self.get_row_count('access', table_name)
            sqlite_count = self.get_row_count('sqlite', table_name)
            if access_count != sqlite_count:
                logging.warning(f"Row count mismatch for {table_name}: Access={access_count}, SQLite={sqlite_count}")
            
        except Exception as e:
            logging.error(f"Error transferring data for table '{table_name}': {e}")
            raise
        finally:
            access_cursor.close()

    def transfer_all_tables(self, exclude_tables: Optional[List[str]] = None) -> None:
        """
        Transfer all tables from Access to SQLite.
        
        Args:
            exclude_tables: List of table names to exclude from transfer
        """
        if exclude_tables is None:
            exclude_tables = []
        
        table_names = self.get_access_table_names()
        logging.info(f"Found {len(table_names)} tables in Access database")
        
        for table_name in table_names:
            if table_name in exclude_tables:
                logging.info(f"Skipping excluded table: {table_name}")
                continue
                
            try:
                logging.info(f"Processing table: {table_name}")
                
                # Get schema and create table in SQLite
                schema = self.get_access_table_schema(table_name)
                self.create_sqlite_table(table_name, schema)
                
                # Transfer data
                self.transfer_table_data(table_name)
                
            except Exception as e:
                logging.error(f"Failed to process table {table_name}: {e}")
                # Continue with next table even if one fails
                continue

    def close_connections(self) -> None:
        """Close all database connections."""
        if self.access_conn:
            try:
                self.access_conn.close()
                logging.info("Closed Access database connection")
            except Exception as e:
                logging.error(f"Error closing Access connection: {e}")
        
        if self.sqlite_conn:
            try:
                self.sqlite_conn.close()
                logging.info("Closed SQLite database connection")
            except Exception as e:
                logging.error(f"Error closing SQLite connection: {e}")

    def __enter__(self):
        """Context manager entry."""
        self.connect_to_access()
        self.connect_to_sqlite()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit."""
        self.close_connections()


# def main():
#     import argparse
    
#     parser = argparse.ArgumentParser(description='Migrate data from Microsoft Access to SQLite')
#     parser.add_argument('access_db', help='Path to the Access database file (.mdb or .accdb)')
#     parser.add_argument('sqlite_db', help='Path to the SQLite database file to create')
#     parser.add_argument('--exclude', nargs='+', help='Table names to exclude from migration', default=[])
    
#     args = parser.parse_args()
    
#     start_time = datetime.now()
#     logging.info(f"Starting migration at {start_time}")
    
#     try:
#         with AccessDBToSQLiteMigrator(args.access_db, args.sqlite_db) as migrator:
#             migrator.transfer_all_tables(exclude_tables=args.exclude)
        
#         end_time = datetime.now()
#         duration = end_time - start_time
#         logging.info(f"Migration completed successfully at {end_time}")
#         logging.info(f"Total duration: {duration}")
        
#     except Exception as e:
#         logging.error(f"Migration failed: {e}")
#         raise

# if __name__ == '__main__':
#     main()

In [54]:
# Usage Examples
# Basic migration:

# bash
# python migrate_access_to_sqlite.py input.mdb output.sqlite
# Excluding specific tables:

# bash
# python migrate_access_to_sqlite.py input.accdb output.sqlite --exclude Table1 Table2

In [55]:
start_time = datetime.now()
logging.info(f"Starting migration at {start_time}")

access_db = r'C:\tmp\access_to_sqlite\Database1_be.accdb'  # Update path
sqlite_db = r'C:\tmp\access_to_sqlite\northwind_be.db'  # Update path
# exclude = ['Table1', 'Table2']  # Example tables to exclude
exclude = []  # No tables excluded in this example

#if sqlite_db exists, then make a backup of it
if os.path.exists(sqlite_db):
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    backup_path = f"{sqlite_db}.{timestamp}.bak"
    logging.info(f"SQLite database already exists. Renaming to {backup_path}")
    os.rename(sqlite_db, backup_path)

with AccessDBToSQLiteMigrator(access_db, sqlite_db) as migrator:
    migrator.transfer_all_tables(exclude_tables=exclude)

end_time = datetime.now()
duration = end_time - start_time
logging.info(f"Migration completed successfully at {end_time}")
logging.info(f"Total duration: {duration}")

2025-05-26 21:04:09,399 - INFO - Starting migration at 2025-05-26 21:04:09.399916
2025-05-26 21:04:09,400 - INFO - SQLite database already exists. Renaming to C:\tmp\access_to_sqlite\northwind_be.db.20250526_210409.bak
2025-05-26 21:04:09,401 - INFO - Initialized migrator with Access DB: C:\tmp\access_to_sqlite\Database1_be.accdb
2025-05-26 21:04:09,403 - INFO - SQLite DB will be created at: C:\tmp\access_to_sqlite\northwind_be.db
2025-05-26 21:04:09,529 - INFO - Successfully connected to Access database
2025-05-26 21:04:09,534 - INFO - Successfully connected to SQLite database
2025-05-26 21:04:09,538 - INFO - Found 9 tables in Access database
2025-05-26 21:04:09,539 - INFO - Processing table: Customers
2025-05-26 21:04:09,543 - INFO - Created table 'Customers' in SQLite
2025-05-26 21:04:09,550 - INFO - Completed transfer for 'Customers'. Total rows: 8
2025-05-26 21:04:09,552 - INFO - Processing table: Employees
2025-05-26 21:04:09,557 - INFO - Created table 'Employees' in SQLite
2025-

In [56]:
import sqlite3
import pyodbc
import pandas as pd
import matplotlib.pyplot as plt
from texttable import Texttable
from datetime import datetime
import logging
import os

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('db_reconciliation.log'),
        logging.StreamHandler()
    ]
)

class DatabaseReconciler:
    def __init__(self, access_db_path: str, sqlite_db_path: str):
        self.access_db_path = os.path.abspath(access_db_path)
        self.sqlite_db_path = os.path.abspath(sqlite_db_path)
        self.access_conn = None
        self.sqlite_conn = None
        self.report_data = []
        
        # Verify databases exist
        if not os.path.exists(self.access_db_path):
            raise FileNotFoundError(f"Access database not found: {self.access_db_path}")
        if not os.path.exists(self.sqlite_db_path):
            raise FileNotFoundError(f"SQLite database not found: {self.sqlite_db_path}")

    def connect_databases(self):
        """Establish connections to both databases"""
        try:
            # Connect to Access
            driver = 'Microsoft Access Driver (*.mdb, *.accdb)'
            conn_str = f'DRIVER={{{driver}}};DBQ={self.access_db_path};'
            self.access_conn = pyodbc.connect(conn_str)
            
            # Connect to SQLite
            self.sqlite_conn = sqlite3.connect(self.sqlite_db_path)
            
            logging.info("Successfully connected to both databases")
        except Exception as e:
            logging.error(f"Failed to connect to databases: {e}")
            raise

    def get_table_names(self, db_type: str) -> list:
        """Get list of tables in specified database"""
        if db_type == 'access':
            cursor = self.access_conn.cursor()
            tables = cursor.tables(tableType='TABLE')
            return [table.table_name for table in tables]
        elif db_type == 'sqlite':
            cursor = self.sqlite_conn.cursor()
            cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
            return [row[0] for row in cursor.fetchall()]
        else:
            raise ValueError("Invalid database type specified")

    def get_row_count(self, db_type: str, table_name: str) -> int:
        """Get row count for a specific table"""
        try:
            if db_type == 'access':
                cursor = self.access_conn.cursor()
                cursor.execute(f'SELECT COUNT(*) FROM "{table_name}"')
                return cursor.fetchone()[0]
            elif db_type == 'sqlite':
                cursor = self.sqlite_conn.cursor()
                cursor.execute(f'SELECT COUNT(*) FROM "{table_name}"')
                return cursor.fetchone()[0]
        except Exception as e:
            logging.warning(f"Error counting rows in {table_name}: {e}")
            return -1

    def compare_schemas(self, table_name: str) -> dict:
        """Compare table schemas between Access and SQLite"""
        result = {
            'table': table_name,
            'columns_match': False,
            'missing_columns': [],
            'type_mismatches': []
        }
        
        try:
            # Get Access schema
            access_cursor = self.access_conn.cursor()
            access_columns = access_cursor.columns(table=table_name)
            access_schema = {col.column_name: col.type_name for col in access_columns}
            
            # Get SQLite schema
            sqlite_cursor = self.sqlite_conn.cursor()
            sqlite_cursor.execute(f"PRAGMA table_info('{table_name}')")
            sqlite_schema = {row[1]: row[2] for row in sqlite_cursor.fetchall()}
            
            # Compare
            missing_columns = set(access_schema.keys()) - set(sqlite_schema.keys())
            if missing_columns:
                result['missing_columns'] = list(missing_columns)
            
            type_mismatches = []
            for col in access_schema:
                if col in sqlite_schema:
                    # Simple type comparison - you might want to enhance this
                    if access_schema[col].upper() != sqlite_schema[col].upper():
                        type_mismatches.append({
                            'column': col,
                            'access_type': access_schema[col],
                            'sqlite_type': sqlite_schema[col]
                        })
            
            result['type_mismatches'] = type_mismatches
            result['columns_match'] = not (missing_columns or type_mismatches)
            
        except Exception as e:
            logging.warning(f"Error comparing schemas for {table_name}: {e}")
            result['error'] = str(e)
        
        return result

    def sample_and_compare_data(self, table_name: str, sample_size: int = 10) -> dict:
        """Compare sample data between databases with Access-compatible sampling"""
        result = {
            'table': table_name,
            'sample_matches': 0,
            'sample_mismatches': 0,
            'sample_errors': 0,
            'sample_results': []
        }
        
        try:
            # Get column names
            access_cursor = self.access_conn.cursor()
            access_cursor.execute(f'SELECT * FROM "{table_name}" WHERE 1=0')
            columns = [column[0] for column in access_cursor.description]
            
            # Get total row count for sampling
            total_rows = self.get_row_count('access', table_name)
            if total_rows <= 0:
                logging.warning(f"Table {table_name} is empty or inaccessible")
                return result
            
            # Get sample from Access using TOP instead of LIMIT
            if total_rows <= sample_size:
                # Small table - take all rows
                access_cursor.execute(f'SELECT TOP {sample_size} * FROM "{table_name}"')
            else:
                # Use RND with record number for sampling
                access_cursor.execute(f'''
                    SELECT TOP {sample_size} * FROM "{table_name}" 
                    ORDER BY RND(CLNG([{columns[0]}]))
                ''')
            
            access_sample = access_cursor.fetchall()
            
            # Get corresponding rows from SQLite
            sqlite_cursor = self.sqlite_conn.cursor()
            
            for i, access_row in enumerate(access_sample):
                try:
                    # Build WHERE clause using first 3 columns that have values
                    where_parts = []
                    params = []
                    used_cols = 0
                    
                    for j in range(len(columns)):
                        if access_row[j] is not None:  # Only use non-NULL columns for matching
                            where_parts.append(f'"{columns[j]}" = ?')
                            params.append(access_row[j])
                            used_cols += 1
                            if used_cols >= 3:  # Use up to 3 columns for matching
                                break
                    
                    if not where_parts:
                        result['sample_errors'] += 1
                        result['sample_results'].append({
                            'sample_id': i+1,
                            'error': "No non-NULL columns found for matching"
                        })
                        continue
                    
                    where_clause = ' AND '.join(where_parts)
                    sqlite_cursor.execute(
                        f'SELECT * FROM "{table_name}" WHERE {where_clause}',
                        params
                    )
                    sqlite_row = sqlite_cursor.fetchone()
                    
                    # Compare rows
                    if sqlite_row is None:
                        match = False
                        diff = "Row not found in SQLite"
                    else:
                        match = (tuple(access_row) == tuple(sqlite_row))
                        if not match:
                            diff = {
                                'column': columns,
                                'access_values': access_row,
                                'sqlite_values': sqlite_row
                            }
                        else:
                            diff = None
                    
                    result['sample_results'].append({
                        'sample_id': i+1,
                        'match': match,
                        'diff': diff if not match else None
                    })
                    
                    if match:
                        result['sample_matches'] += 1
                    else:
                        result['sample_mismatches'] += 1
                
                except Exception as e:
                    result['sample_errors'] += 1
                    result['sample_results'].append({
                        'sample_id': i+1,
                        'error': str(e)
                    })
                    logging.warning(f"Error comparing sample {i+1} in {table_name}: {e}")
        
        except Exception as e:
            logging.warning(f"Error sampling data from {table_name}: {e}")
            result['error'] = str(e)
        
        return result

    def generate_reconciliation_report(self):
        """Generate comprehensive reconciliation report"""
        self.report_data = []
        access_tables = self.get_table_names('access')
        sqlite_tables = self.get_table_names('sqlite')
        
        # Check for missing tables
        missing_tables = set(access_tables) - set(sqlite_tables)
        extra_tables = set(sqlite_tables) - set(access_tables)
        
        for table in access_tables:
            if table in missing_tables:
                self.report_data.append({
                    'table': table,
                    'status': 'MISSING',
                    'access_rows': self.get_row_count('access', table),
                    'sqlite_rows': 0,
                    'schema_match': False,
                    'sample_match_rate': 0
                })
                continue
                
            # Get row counts
            access_rows = self.get_row_count('access', table)
            sqlite_rows = self.get_row_count('sqlite', table)
            
            # Compare schemas
            schema_result = self.compare_schemas(table)
            
            # Sample data comparison
            sample_result = self.sample_and_compare_data(table)
            sample_match_rate = (
                sample_result['sample_matches'] / 
                (sample_result['sample_matches'] + sample_result['sample_mismatches'])
                if (sample_result['sample_matches'] + sample_result['sample_mismatches']) > 0 
                else 0
            )
            
            self.report_data.append({
                'table': table,
                'status': 'COMPLETE' if access_rows == sqlite_rows else 'INCOMPLETE',
                'access_rows': access_rows,
                'sqlite_rows': sqlite_rows,
                'schema_match': schema_result['columns_match'],
                'sample_match_rate': sample_match_rate,
                'missing_columns': schema_result['missing_columns'],
                'type_mismatches': schema_result['type_mismatches'],
                'sample_results': sample_result['sample_results']
            })
        
        # Add extra tables found in SQLite
        for table in extra_tables:
            self.report_data.append({
                'table': table,
                'status': 'EXTRA',
                'access_rows': 0,
                'sqlite_rows': self.get_row_count('sqlite', table),
                'schema_match': False,
                'sample_match_rate': 0
            })

    def display_text_report(self):
        """Display formatted text report"""
        table = Texttable()
        table.set_deco(Texttable.HEADER)
        table.set_cols_align(["l", "l", "r", "r", "l", "r"])
        table.set_cols_width([20, 12, 12, 12, 8, 12])
        
        # Header
        table.add_row([
            "Table", 
            "Status", 
            "Access Rows", 
            "SQLite Rows", 
            "Schema", 
            "Sample Match"
        ])
        
        # Data
        for item in self.report_data:
            table.add_row([
                item['table'],
                item['status'],
                item['access_rows'],
                item['sqlite_rows'],
                "OK" if item.get('schema_match', False) else "DIFF",
                f"{item.get('sample_match_rate', 0)*100:.1f}%"
            ])
        
        print("\nDatabase Reconciliation Summary:")
        print(table.draw())
        
        # Print details for problematic tables
        print("\nDetailed Findings:")
        for item in self.report_data:
            if item['status'] != 'COMPLETE' or not item['schema_match'] or item.get('sample_match_rate', 1) < 1:
                print(f"\nTable: {item['table']}")
                print(f"Status: {item['status']}")
                print(f"Row counts: Access={item['access_rows']}, SQLite={item['sqlite_rows']}")
                
                if item.get('missing_columns'):
                    print(f"Missing columns: {', '.join(item['missing_columns'])}")
                
                if item.get('type_mismatches'):
                    print("Type mismatches:")
                    for mismatch in item['type_mismatches']:
                        print(f"  {mismatch['column']}: Access={mismatch['access_type']}, SQLite={mismatch['sqlite_type']}")
                
                if item.get('sample_match_rate', 1) < 1:
                    print(f"Sample match rate: {item['sample_match_rate']*100:.1f}%")
                    for sample in item.get('sample_results', []):
                        if not sample.get('match', True):
                            print(f"  Sample {sample['sample_id']} mismatch:")
                            if 'diff' in sample and isinstance(sample['diff'], dict):
                                for col, acc_val, sql_val in zip(
                                    sample['diff']['column'],
                                    sample['diff']['access_values'],
                                    sample['diff']['sqlite_values']
                                ):
                                    if acc_val != sql_val:
                                        print(f"    {col}: Access={acc_val}, SQLite={sql_val}")

    def generate_visualizations(self, output_dir='reports'):
        """Generate visualizations of reconciliation results"""
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        
        # Create DataFrame for visualization
        df = pd.DataFrame(self.report_data)
        
        # 1. Migration Completeness Chart
        plt.figure(figsize=(10, 6))
        status_counts = df['status'].value_counts()
        plt.bar(status_counts.index, status_counts.values)
        plt.title('Table Migration Status Distribution')
        plt.ylabel('Number of Tables')
        plt.savefig(os.path.join(output_dir, 'migration_status.png'))
        plt.close()
        
        # 2. Row Count Comparison Chart
        plt.figure(figsize=(12, 8))
        df_filtered = df[df['status'].isin(['COMPLETE', 'INCOMPLETE'])]
        df_filtered = df_filtered.sort_values('access_rows', ascending=False)
        
        x = range(len(df_filtered))
        width = 0.35
        
        plt.bar(x, df_filtered['access_rows'], width, label='Access')
        plt.bar([i + width for i in x], df_filtered['sqlite_rows'], width, label='SQLite')
        
        plt.xlabel('Tables')
        plt.ylabel('Row Counts')
        plt.title('Row Count Comparison (Access vs SQLite)')
        plt.xticks([i + width/2 for i in x], df_filtered['table'], rotation=90)
        plt.legend()
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, 'row_count_comparison.png'))
        plt.close()
        
        # 3. Data Match Quality Heatmap
        plt.figure(figsize=(12, 6))
        df_filtered = df[~df['status'].isin(['MISSING', 'EXTRA'])]
        
        # Create match quality score (0-100)
        df_filtered['quality_score'] = (
            (df_filtered['schema_match'].astype(int) * 50) +
            (df_filtered['sample_match_rate'] * 50)
        )
        df_filtered = df_filtered.sort_values('quality_score')
        
        plt.barh(df_filtered['table'], df_filtered['quality_score'], color='skyblue')
        plt.xlabel('Data Quality Score (0-100)')
        plt.title('Migration Data Quality by Table')
        plt.xlim(0, 100)
        plt.grid(axis='x', alpha=0.3)
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, 'data_quality.png'))
        plt.close()

    def close_connections(self):
        """Close database connections"""
        if self.access_conn:
            self.access_conn.close()
        if self.sqlite_conn:
            self.sqlite_conn.close()

# def main():
#     import argparse
    
#     parser = argparse.ArgumentParser(description='Reconcile Access and SQLite databases')
#     parser.add_argument('access_db', help='Path to Access database file')
#     parser.add_argument('sqlite_db', help='Path to SQLite database file')
#     parser.add_argument('--output', help='Output directory for reports', default='reports')
    
#     args = parser.parse_args()
    
#     reconciler = DatabaseReconciler(args.access_db, args.sqlite_db)
    
#     try:
#         reconciler.connect_databases()
#         reconciler.generate_reconciliation_report()
#         reconciler.display_text_report()
#         reconciler.generate_visualizations(args.output)
        
#         print(f"\nReports and visualizations saved to: {args.output}")
#     finally:
#         reconciler.close_connections()

# if __name__ == '__main__':
#     main()

In [58]:
from datetime import datetime

start_time = datetime.now()
logging.info(f"Starting db recon at {start_time}")

access_db = r'C:\tmp\access_to_sqlite\Database1_be.accdb'  # Update path
sqlite_db = r'C:\tmp\access_to_sqlite\northwind_be.db'  # Update path
# exclude = ['Table1', 'Table2']  # Example tables to exclude
exclude = []  # No tables excluded in this example

output = r'C:\tmp\access_to_sqlite\reports'  # Directory for reports
reconciler = DatabaseReconciler(access_db, sqlite_db)

try:
    reconciler.connect_databases()
    reconciler.generate_reconciliation_report()
    reconciler.display_text_report()
    reconciler.generate_visualizations(output)
    
    print(f"\nReports and visualizations saved to: {output}")
finally:
    reconciler.close_connections()

end_time = datetime.now()
duration = end_time - start_time
logging.info(f"Recon completed successfully at {end_time}")
logging.info(f"Total duration: {duration}")

2025-05-26 21:04:20,410 - INFO - Starting db recon at 2025-05-26 21:04:20.410881
2025-05-26 21:04:20,522 - INFO - Successfully connected to both databases



Database Reconciliation Summary:
Table                  Status          Access Rows    SQLite Rows   Schema     Sample Match
Customers              COMPLETE                  8              8   DIFF               0.0%
Employees              COMPLETE                 11             11   DIFF               0.0%
NorthwindFeatures      COMPLETE                 34             34   DIFF             100.0%
OrderDetails           COMPLETE                 91             91   DIFF               0.0%
Orders                 COMPLETE                 39             39   DIFF               0.0%
OrderStatus            COMPLETE                  5              5   DIFF               0.0%
Products               COMPLETE                 43             43   DIFF               0.0%
SystemSettings         COMPLETE                  4              4   DIFF             100.0%
Welcome                COMPLETE                  1              1   DIFF             100.0%

Detailed Findings:

Table: Customers
Status: 

2025-05-26 21:04:20,976 - INFO - Recon completed successfully at 2025-05-26 21:04:20.976864
2025-05-26 21:04:20,977 - INFO - Total duration: 0:00:00.565983



Reports and visualizations saved to: C:\tmp\access_to_sqlite\reports
