In [None]:
import pandas as pd
from sqlalchemy import create_engine
import os
import logging

# ------------------- Logging Setup -------------------
logging.basicConfig(
    filename="csv_to_mysql.log",
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s"
)

# ------------------- Config -------------------
csv_files = [
    ('customers.csv', 'customers'),
    ('orders.csv', 'orders'),
    ('sellers.csv', 'sellers'),
    ('products.csv', 'products'),
    ('order_items.csv', 'order_items'),
    ('payments.csv', 'payments'),
    ('geolocation.csv', 'geolocation')
]

folder_path = 'D:/Projects/Ecomerce_Project_using_SQL_and_Python'

# Create SQLAlchemy engine (using pymysql as DB driver under the hood)
engine = create_engine("mysql+pymysql://root:Amulya@2002@localhost/ecommerce")

# ------------------- Helpers -------------------
def get_sql_type(dtype):
    """Map pandas dtype to MySQL type."""
    if pd.api.types.is_integer_dtype(dtype):
        return 'INT'
    elif pd.api.types.is_float_dtype(dtype):
        return 'FLOAT'
    elif pd.api.types.is_bool_dtype(dtype):
        return 'BOOLEAN'
    elif pd.api.types.is_datetime64_any_dtype(dtype):
        return 'DATETIME'
    else:
        return 'VARCHAR(255)'  # safer than TEXT

def auto_convert_dates(df):
    """Try to convert object columns to datetime if they look like dates."""
    for col in df.columns:
        if df[col].dtype == 'object':
            try:
                converted = pd.to_datetime(df[col], errors='raise', infer_datetime_format=True)
                if converted.notna().sum() > 0:
                    df[col] = converted
            except Exception:
                continue
    return df

# ------------------- Main Loop -------------------
with engine.begin() as conn:  # ensures commit/rollback safely
    for csv_file, table_name in csv_files:
        try:
            file_path = os.path.join(folder_path, csv_file)
            df = pd.read_csv(file_path)

            # Replace NaN with None
            df = df.where(pd.notnull(df), None)

            # Clean column names
            df.columns = [col.replace(' ', '_').replace('-', '_').replace('.', '_') for col in df.columns]

            # Auto-convert datetime-like columns
            df = auto_convert_dates(df)

            logging.info(f"Processing {csv_file} → {table_name}, rows: {len(df)}")

            # Generate CREATE TABLE (no primary key)
            columns = ', '.join([f'`{col}` {get_sql_type(df[col].dtype)}' for col in df.columns])
            create_table_query = f'CREATE TABLE IF NOT EXISTS `{table_name}` ({columns})'
            conn.execute(create_table_query)

            # Plain INSERT (no UPSERT without PK/unique key)
            placeholders = ', '.join(['%s'] * len(df.columns))
            col_names = ', '.join([f'`{col}`' for col in df.columns])
            sql = f"INSERT INTO `{table_name}` ({col_names}) VALUES ({placeholders})"

            values = [tuple(None if pd.isna(x) else x for x in row) for row in df.values]
            conn.execute(sql, values)

            logging.info(f"Loaded {len(df)} rows into `{table_name}`")

        except Exception as e:
            logging.error(f"Error processing {csv_file}: {e}")

logging.info("ETL process completed successfully.")
