## Eliminar fechas fuera del rango necesario en MYSQL

Se utiliza un codigo ejecutado local o en colab para limpieza de Taxis_fhv_data 

In [7]:
import mysql.connector
from dotenv import load_dotenv
import os
import time
import logging
from datetime import datetime

# Configure logging
logging.basicConfig(
    level=logging.INFO, 
    format='%(asctime)s - %(levelname)s: %(message)s',
    filename='database_deletion.log'
)
logger = logging.getLogger(__name__)

class DatabaseManager:
    def __init__(self):
        # Load environment variables
        load_dotenv()

        # Get database credentials from environment variables
        self.db_host = os.getenv('DB_HOST')
        self.db_user = os.getenv('DB_USER')
        self.db_password = os.getenv('DB_PASSWORD')
        self.db_name = os.getenv('DB_NAME')

    def _create_connection(self):
        """Create a database connection"""
        try:
            conn = mysql.connector.connect(
                host=self.db_host,
                user=self.db_user,
                password=self.db_password,
                database=self.db_name
            )
            return conn
        except mysql.connector.Error as err:
            logger.error(f"Connection error: {err}")
            raise

    def delete_records_outside_date_range(self, start_date, end_date, batch_size=10000):
        """
        Delete records outside specified date range with retry and batching
        
        :param start_date: Start date for keeping records
        :param end_date: End date for keeping records
        :param batch_size: Number of records to delete in each batch
        """
        conn = None
        cursor = None
        total_deleted = 0

        try:
            conn = self._create_connection()
            conn.autocommit = False  # Disable autocommit
            cursor = conn.cursor()
            
            # Print and log start of deletion process
            print("Starting deletion process...")
            logger.info("Starting deletion process...")

            # Validate date format
            try:
                datetime.strptime(start_date, '%Y-%m-%d')
                datetime.strptime(end_date, '%Y-%m-%d')
            except ValueError:
                logger.error("Invalid date format. Use YYYY-MM-DD")
                print("Invalid date format. Use YYYY-MM-DD")
                return 0

            # Initial check to estimate total records to delete
            count_query = f"""
            SELECT COUNT(*) FROM taxi_fhv_data
            WHERE NOT (Pickup_datetime BETWEEN '{start_date}' AND '{end_date}')
            """
            cursor.execute(count_query)
            total_records = cursor.fetchone()[0]
            logger.info(f"Total records to delete: {total_records}")
            print(f"Total records to delete: {total_records}")

            # Delete in batches to reduce lock contention
            while True:
                delete_query = f"""
                DELETE FROM taxi_fhv_data 
                WHERE NOT (Pickup_datetime BETWEEN '{start_date}' AND '{end_date}')
                LIMIT {batch_size}
                """

                # Retry mechanism with exponential backoff
                max_retries = 5
                for attempt in range(max_retries):
                    try:
                        cursor.execute(delete_query)
                        conn.commit()
                        rows_deleted = cursor.rowcount
                        total_deleted += rows_deleted

                        logger.info(f"Batch delete - Rows deleted: {rows_deleted}")
                        print(f"Batch delete - Rows deleted: {rows_deleted}")

                        # Exit if no more records to delete
                        if rows_deleted == 0:
                            break

                        break  # Successful deletion
                    except mysql.connector.Error as err:
                        if err.errno == 1205:  # Lock wait timeout
                            wait_time = 2 ** attempt
                            logger.warning(f"Lock timeout. Retry {attempt + 1}/{max_retries}. Waiting {wait_time} seconds")
                            print(f"Lock timeout. Retry {attempt + 1}/{max_retries}. Waiting {wait_time} seconds")
                            time.sleep(wait_time)
                            conn.rollback()
                        else:
                            raise

                # Break main loop if no more records
                if rows_deleted == 0:
                    break

            logger.info(f"Total records deleted: {total_deleted}")
            print(f"Total records deleted: {total_deleted}")
            return total_deleted

        except Exception as e:
            if conn:
                conn.rollback()
            logger.error(f"Deletion error: {e}")
            print(f"Deletion error: {e}")
            raise
        finally:
            # Ensure resources are closed
            if cursor:
                cursor.close()
            if conn:
                conn.close()
            
            # Print and log end of deletion process
            print("Deletion process completed.")
            logger.info("Deletion process completed.")

def main():
    # Execution
    db_manager = DatabaseManager()
    start_date = '2024-01-01'
    end_date = '2024-08-31'
    
    try:
        deleted_count = db_manager.delete_records_outside_date_range(start_date, end_date)
        print(f"Successfully deleted {deleted_count} records.")
    except Exception as e:
        print(f"Deletion process failed: {e}")

if __name__ == "__main__":
    main()


Starting deletion process...
Total records to delete: 1527
Batch delete - Rows deleted: 1527
Batch delete - Rows deleted: 0
Total records deleted: 1527
Deletion process completed.
Successfully deleted 1527 records.


## 🗄️ Database Manager - Sistema de Gestión de Datos de Taxis FHV

🚕 Sistema de Consulta y Logging para Base de Datos de Taxis

In [8]:
import mysql.connector
from dotenv import load_dotenv
import os
import logging

# Configurar logging
logging.basicConfig(
    level=logging.INFO, 
    format='%(asctime)s - %(levelname)s: %(message)s',
    filename='database_deletion.log'
)
logger = logging.getLogger(__name__)

class DatabaseManager:
    def __init__(self):
        # Cargar variables de entorno
        load_dotenv()

        # Obtener credenciales de la base de datos desde variables de entorno
        self.db_host = os.getenv('DB_HOST')
        self.db_user = os.getenv('DB_USER')
        self.db_password = os.getenv('DB_PASSWORD')
        self.db_name = os.getenv('DB_NAME')

    def _create_connection(self):
        """Crear una conexión a la base de datos"""
        try:
            conn = mysql.connector.connect(
                host=self.db_host,
                user=self.db_user,
                password=self.db_password,
                database=self.db_name
            )
            logger.info("🟢 Conexión establecida exitosamente")
            return conn
        except mysql.connector.Error as err:
            logger.error(f"🔴 Error de conexión: {err}")
            raise

    def drop_table(self):
        """Eliminar la tabla taxi_fhv_data"""
        conn = None
        cursor = None

        try:
            conn = self._create_connection()
            cursor = conn.cursor()
            
            # Iniciar el proceso de eliminación de la tabla
            print("🚀 Iniciando el proceso de eliminación de la tabla...")
            logger.info("🚀 Iniciando el proceso de eliminación de la tabla...")

            drop_table_query = "DROP TABLE IF EXISTS taxi_fhv_data"

            cursor.execute(drop_table_query)
            conn.commit()

            logger.info("🗑️ Tabla taxi_fhv_data eliminada exitosamente")
            print("🗑️ Tabla taxi_fhv_data eliminada exitosamente")

        except Exception as e:
            if conn:
                conn.rollback()
            logger.error(f"❌ Error durante la eliminación de la tabla: {e}")
            print(f"❌ Error durante la eliminación de la tabla: {e}")
            raise

        finally:
            # Asegurarse de que los recursos se cierren
            if cursor:
                cursor.close()
            if conn:
                conn.close()
                logger.info("🔌 Conexión cerrada")

def main():
    # Ejecución
    db_manager = DatabaseManager()
    
    try:
        db_manager.drop_table()
        print("✅ Tabla eliminada exitosamente.")
    except Exception as e:
        print(f"❌ El proceso de eliminación de la tabla falló: {e}")

if __name__ == "__main__":
    main()


🚀 Iniciando el proceso de eliminación de la tabla...
🗑️ Tabla taxi_fhv_data eliminada exitosamente
✅ Tabla eliminada exitosamente.
