In [17]:
import pandas as pd
import os
from sqlalchemy import create_engine
import logging
import time

# Define your log directory path
log_dir = 'Vendor_analysis/logs'
os.makedirs(log_dir, exist_ok=True)  # ✅ Ensure the folder exists

# Set up logging
logging.basicConfig(
    filename=os.path.join(log_dir, "ingestion.log"),  # ✅ Full correct path
    level=logging.DEBUG,
    format="%(asctime)s - %(levelname)s - %(message)s",
    filemode="a"
)

# Create SQLite engine
engine = create_engine('sqlite:///inventory.db')

def ingest_db(df, table_name, engine):
    '''This function ingests the DataFrame into a database table'''
    df.to_sql(table_name, con=engine, if_exists='replace', index=False)

def load_raw_data():
    '''This function loads CSVs into DataFrames and ingests them into the database'''
    start = time.time()
    logging.info("Starting ingestion process...")

    # Check if data directory exists
    if not os.path.exists('data'):
        logging.error("The 'data' directory does not exist.")
        return

    files_found = False
    for file in os.listdir('data'):
        if file.endswith('.csv'):
            files_found = True
            try:
                df = pd.read_csv(os.path.join('data', file))
                logging.info(f'Ingesting {file} into database')
                ingest_db(df, file[:-4], engine)
                logging.info(f'Successfully ingested {file}')
            except Exception as e:
                logging.error(f'Error processing file {file}: {e}')

    if not files_found:
        logging.warning("No CSV files found in the 'data' directory.")

    end = time.time()
    total_time = (end - start) / 60
    logging.info('Ingestion Complete')
    logging.info(f'Total time taken: {total_time:.2f} minutes')

if __name__ == '__main__':
    load_raw_data()



(2372474, 16)
(12261, 9)
(5543, 10)
(206529, 9)
(224489, 9)
(12825363, 14)
