# Vendor Performance Data Ingestion Pipeline
This notebook provides an automated workflow for importing vendor-related CSV files into a local inventory.db SQLite database. The process is designed for scalability, logging, and efficient memory use, preparing the data for further vendor performance analysis.

## Features:
- Scans a specified data directory and locates all CSV files for ingestion.
- Efficiently reads each file in manageable chunks to prevent memory overload.
- Appends each batch directly to the relevant database table.
- All steps and processing times are recorded in a dedicated log file for transparency and debugging.

In [1]:
import pandas as pd
import os
from sqlalchemy import create_engine
import logging
import time
logging.basicConfig(
    filename="E:/Vendor Project/log/ingestion_db.log",
    level = logging.DEBUG,
    format= "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    filemode="a"
)
engine = create_engine('sqlite:///inventory.db')

def ingest_db(chunk, table_name, engine):
    chunk.to_sql(table_name, con=engine, if_exists='append', index=False)
def load_raw_data():
    start = time.time()
    for file in os.listdir('E:/Vendor Project/data'):
        if '.csv' in file:
            table_name = file[:-4]
            logging.info(f"Ingesting {table_name} in database")
            for chunk in pd.read_csv('E:/Vendor Project/data/'+file, chunksize=100000):  # adjust chunksize as needed
                ingest_db(chunk, table_name, engine)
    end = time.time()
    total_time = (end-start)/60
    logging.info("Finished ingesting data")
    logging.info(f"Total time: {total_time} minutes")

if __name__ == "__main__":
    load_raw_data()