In [1]:
import pandas as pd
import os
from sqlalchemy import create_engine
import logging
import time


os.makedirs("logs", exist_ok=True)
os.makedirs("data", exist_ok=True)


logging.basicConfig(
    filename="logs/ingestion_db.log",
    level=logging.DEBUG,
    format="%(asctime)s - %(levelname)s - %(message)s",
    filemode="a"
)


engine = create_engine('sqlite:///demodb.db')

def ingest_db(df, table_name, engine):
    """This function ingests the dataframe into a database table."""
    try:
        df.to_sql(table_name, con=engine, if_exists='replace', index=False)
        logging.info(f"✅ Successfully ingested table: {table_name}")
    except Exception as e:
        logging.error(f"❌ Error ingesting {table_name}: {e}")

def load_raw_data():
    """This function loads CSVs as dataframes and ingests them into the database."""
    start = time.time()
    csv_files = [f for f in os.listdir('data') if f.endswith('.csv')]

    if not csv_files:
        logging.warning("⚠️ No CSV files found in 'data' folder.")
        print("No CSV files found in 'data' folder.")
        return

    for file in csv_files:
        try:
            file_path = os.path.join('data', file)
            df = pd.read_csv(file_path)
            logging.info(f"Ingesting {file} into database.")
            ingest_db(df, file[:-4], engine)
        except Exception as e:
            logging.error(f"❌ Error processing {file}: {e}")

    end = time.time()
    total_time = round((end - start) / 60, 2)
    logging.info('--------------- Ingestion Complete ---------------')
    logging.info(f'Total Time Taken: {total_time} minutes')
    print(f"✅ Ingestion Complete! Total Time Taken: {total_time} minutes")

if __name__ == "__main__":
    load_raw_data()


✅ Ingestion Complete! Total Time Taken: 19.63 minutes
