In [None]:
import mysql.connector
from dotenv import load_dotenv
import os
import time
import logging
from datetime import datetime, timedelta

# Configure logging
logging.basicConfig(
    level=logging.INFO, 
    format='%(asctime)s - %(levelname)s: %(message)s',
    filename='database_deletion.log'
)
logger = logging.getLogger(__name__)

class DatabaseManager:
    def __init__(self):
        # Load environment variables
        load_dotenv()

        # Get database credentials from environment variables
        self.db_host = os.getenv('DB_HOST')
        self.db_user = os.getenv('DB_USER')
        self.db_password = os.getenv('DB_PASSWORD')
        self.db_name = os.getenv('DB_NAME')

    def _create_connection(self):
        """Create a database connection"""
        try:
            conn = mysql.connector.connect(
                host=self.db_host,
                user=self.db_user,
                password=self.db_password,
                database=self.db_name
            )
            return conn
        except mysql.connector.Error as err:
            logger.error(f"🔴 Connection error: {err}")
            raise

    def delete_records_with_source_U_in_date_range(self, start_date, end_date, batch_size=100000):
        """
        Delete records where source is 'U' within a specified date range, in 15-day increments, with retry and batching
        
        :param start_date: Start date for the range in 'YYYY-MM-DD' format
        :param end_date: End date for the range in 'YYYY-MM-DD' format
        :param batch_size: Number of records to delete in each batch
        """
        conn = None
        cursor = None
        total_deleted = 0

        try:
            conn = self._create_connection()
            conn.autocommit = False  # Disable autocommit
            cursor = conn.cursor()

            # Convert start_date and end_date to datetime objects
            current_date = datetime.strptime(start_date, '%Y-%m-%d')
            end_date = datetime.strptime(end_date, '%Y-%m-%d')
            
            # Print and log start of deletion process
            print("🚀 Starting deletion process...")
            logger.info("🚀 Starting deletion process...")

            # Iterate over the date range in 15-day increments
            while current_date <= end_date:
                next_date = current_date + timedelta(days=5)
                
                # Initial check to estimate total records to delete in this 15-day range
                count_query = f"""
                SELECT COUNT(*) FROM taxi_fhv_data 
                WHERE source = 'U' 
                AND Pickup_datetime BETWEEN '{current_date.strftime('%Y-%m-%d')}' AND '{next_date.strftime('%Y-%m-%d')}'
                """
                cursor.execute(count_query)
                total_records = cursor.fetchone()[0]
                logger.info(f"📊 Total records to delete from {current_date.strftime('%Y-%m-%d')} to {next_date.strftime('%Y-%m-%d')}: {total_records}")
                print(f"📊 Total records to delete from {current_date.strftime('%Y-%m-%d')} to {next_date.strftime('%Y-%m-%d')}: {total_records}")

                # Delete in batches to reduce lock contention
                while True:
                    delete_query = f"""
                    DELETE FROM taxi_fhv_data 
                    WHERE source = 'U'
                    AND Pickup_datetime BETWEEN '{current_date.strftime('%Y-%m-%d')}' AND '{next_date.strftime('%Y-%m-%d')}'
                    LIMIT {batch_size}
                    """

                    # Retry mechanism with exponential backoff
                    max_retries = 5
                    for attempt in range(max_retries):
                        try:
                            cursor.execute(delete_query)
                            conn.commit()
                            rows_deleted = cursor.rowcount
                            total_deleted += rows_deleted

                            logger.info(f"🗑️ Batch delete - Rows deleted from {current_date.strftime('%Y-%m-%d')} to {next_date.strftime('%Y-%m-%d')}: {rows_deleted}")
                            print(f"🗑️ Batch delete - Rows deleted from {current_date.strftime('%Y-%m-%d')} to {next_date.strftime('%Y-%m-%d')}: {rows_deleted}")

                            # Exit if no more records to delete
                            if rows_deleted == 0:
                                break

                            break  # Successful deletion
                        except mysql.connector.Error as err:
                            if err.errno == 1205:  # Lock wait timeout
                                wait_time = 2 ** attempt
                                logger.warning(f"⏳ Lock timeout. Retry {attempt + 1}/{max_retries}. Waiting {wait_time} seconds")
                                print(f"⏳ Lock timeout. Retry {attempt + 1}/{max_retries}. Waiting {wait_time} seconds")
                                time.sleep(wait_time)
                                conn.rollback()
                            else:
                                raise

                    # Break main loop if no more records
                    if rows_deleted == 0:
                        break

                current_date = next_date + timedelta(days=5)

            logger.info(f"📈 Total records deleted: {total_deleted}")
            print(f"📈 Total records deleted: {total_deleted}")
            return total_deleted

        except Exception as e:
            if conn:
                conn.rollback()
            logger.error(f"❌ Deletion error: {e}")
            print(f"❌ Deletion error: {e}")
            raise
        finally:
            # Ensure resources are closed
            if cursor:
                cursor.close()
            if conn:
                conn.close()
            
            # Print and log end of deletion process
            print("✅ Deletion process completed.")
            logger.info("✅ Deletion process completed.")

def main():
    # Execution
    db_manager = DatabaseManager()
    start_date = '2024-08-26'
    end_date = '2024-08-31'
    
    try:
        deleted_count = db_manager.delete_records_with_source_U_in_date_range(start_date, end_date)
        print(f"✅ Successfully deleted {deleted_count} records.")
    except Exception as e:
        print(f"❌ Deletion process failed: {e}")

if __name__ == "__main__":
    main()

🚀 Starting deletion process...
