In [2]:
import os
import pandas as pd
from sqlalchemy import create_engine
import logging
import time
import urllib.parse

# --- Logging setup ---
os.makedirs("logs", exist_ok=True)  # ensure logs folder exists
logging.basicConfig(
    filename="logs/ingestion_db.log",
    level=logging.DEBUG,
    format="%(asctime)s - %(levelname)s - %(message)s",
    filemode="a"
)

# --- MySQL connection ---
password = "##########"
encoded_password = urllib.parse.quote_plus(password)
engine = create_engine(f"mysql+pymysql://root:{encoded_password}@127.0.0.1/customer_behavior")

# --- Folder path containing CSV files ---
folder_path = r"D:\data analysis end to end project customer behaviour"

def ingest_db(df, table_name, engine):
    """Ingests the dataframe into the MySQL database."""
    logging.info(f"Ingesting '{table_name}' into database.")
    try:
        df.to_sql(table_name, con=engine, if_exists='replace', index=False)
        logging.info(f" Successfully ingested '{table_name}' into database.")
        print(f"Table '{table_name}' created/updated successfully.")
    except Exception as e:
        logging.error(f" Failed to ingest '{table_name}': {e}")
        print(f" Error while uploading '{table_name}': {e}")

def load_raw_data():
    """Loads all CSVs from folder and uploads to MySQL."""
    start = time.time()

    if not os.path.exists(folder_path):
        logging.error(f"Folder not found: {folder_path}")
        print(f"Folder not found: {folder_path}")
        return

    csv_files = [f for f in os.listdir(folder_path) if f.endswith(".csv")]

    if not csv_files:
        logging.warning("No CSV files found in folder.")
        print("No CSV files found in the given folder.")
        return

    for file in csv_files:
        file_path = os.path.join(folder_path, file)
        try:
            logging.info(f"Reading file: {file_path}")
            df = pd.read_csv(file_path)
            table_name = os.path.splitext(file)[0].replace(" ", "_")  # safe table name
            ingest_db(df, table_name, engine)
        except Exception as e:
            logging.error(f"Error processing file '{file}': {e}")
            print(f"Error processing '{file}': {e}")
                
    end = time.time()
    total_time_minutes = (end - start) / 60
    logging.info('-----------------------Ingestion completed----------------------------')
    logging.info(f"Total time taken: {total_time_minutes:.2f} minutes")
    print(f"\n Ingestion completed in {total_time_minutes:.2f} minutes.")

if __name__ == "__main__":
    load_raw_data()


Table 'feature_engineering_customer_shopping_behavior' created/updated successfully.

 Ingestion completed in 0.01 minutes.
