In [3]:
import pandas as pd
import os
from sqlalchemy import create_engine
import logging
import time

# --- Setup Directories ---
# FIX: Ensure the 'logs' directory exists BEFORE configuring logging.
os.makedirs('logs', exist_ok=True) 

# --- Logging Configuration (From Reference Image) ---
logging.basicConfig(
    filename="logs/ingestion_db.log",
    level=logging.DEBUG,
    format="%(asctime)s - %(levelname)s - %(message)s",
    filemode="a"
)

# --- Database Engine Setup ---
engine = create_engine("mysql+pymysql://root:Akshad%40170405@localhost:3306/inventory")


def ingest_db(df, table_name, engine):
    '''this function will ingest the dataframe into database table'''
    # Clean column names for SQL
    df.columns = df.columns.str.replace('[^A-Za-z0-9_]+', '_', regex=True).str.strip('_').str.lower()
    
    # Write to database (using the parameters from your reference images)
    df.to_sql(table_name, con=engine, if_exists='replace', index=False)


def load_raw_data():
    '''this function will load the CSVs as dataframe and ingest into db'''
    
    overall_start = time.time() 
    
    if not os.path.exists('data'):
        logging.error("The 'data' directory was not found. Please create it and place your CSV files inside.")
        return

    for file in os.listdir('data'):
        if file.lower().endswith('.csv') and 'sales' not in file.lower():
            
            # --- Ingestion Logic ---
            try:
                file_start_time = time.time()
                df = pd.read_csv('data/'+file)
                table_name = os.path.splitext(file)[0].lower()
                
                logging.info(f'Ingesting ({file}) in db with {len(df)} rows...')
                
                # Ingest the data frame
                ingest_db(df, table_name, engine)
                
                # End timer and calculate duration
                file_end_time = time.time()
                file_ingestion_time = file_end_time - file_start_time
     
                logging.info(f'Successfully ingested table "{table_name}" in {file_ingestion_time:.2f} seconds.')

            except Exception as e:
                logging.error(f"Failed to process {file}. Error: {e}")
                continue # Skip to the next file
        
        else:
            logging.info(f'Skipping file: {file}')

    overall_end = time.time()
    total_time = (overall_end - overall_start)/60
    
    logging.info('-------------------Ingestion Complete-------------------')
    logging.info(f'\nTotal Time Taken: {total_time:.2f} minutes')


if __name__ == '__main__':
    load_raw_data()