**Step 01:**
- Install this `pip install mysql-connector-python` and check the version by this command `print(mysql.connector.__version__)` and the version should be 8.0+. 

**Step**
- Copy-Paste the following code in MySQL workbench. 
```SQL
SET GLOBAL connect_timeout = 28800;
SET GLOBAL wait_timeout = 28800;
SET GLOBAL interactive_timeout = 28800;
SET GLOBAL max_allowed_packet = 268435456;
``` 

In [1]:
import mysql.connector
print(mysql.connector.__version__)

9.3.0


In [None]:
import pandas as pd
import mysql.connector
import os
from mysql.connector import Error

# List of CSV files and their corresponding table names
csv_files = [
    ('customers.csv', 'customers'),
    ('geolocation.csv', 'geolocation'),
    ('order_items.csv', 'order_items'),
    ('orders.csv', 'orders'),
    ('products.csv', 'products'),
    ('sellers.csv', 'sellers'),
    ('payments.csv', 'payments')
]

# Folder containing the CSV files (using raw string for Windows path)
folder_path = r'D:\Data Analytics\Analytics_Project\archive'

def get_sql_type(dtype):
    """Map pandas data types to SQL data types"""
    if pd.api.types.is_integer_dtype(dtype):
        return 'INT'
    elif pd.api.types.is_float_dtype(dtype):
        return 'FLOAT'
    elif pd.api.types.is_bool_dtype(dtype):
        return 'BOOLEAN'
    elif pd.api.types.is_datetime64_any_dtype(dtype):
        return 'DATETIME'
    else:
        return 'TEXT'

def create_table(cursor, table_name, columns):
    """Create table in MySQL database"""
    create_table_query = f'CREATE TABLE IF NOT EXISTS `{table_name}` ({columns})'
    try:
        cursor.execute(create_table_query)
        print(f"Table '{table_name}' created successfully")
    except Error as e:
        print(f"Error creating table '{table_name}': {e}")

def insert_data(cursor, table_name, df):
    """Insert data from DataFrame to MySQL table"""
    # Prepare column names and placeholders
    columns = ', '.join([f'`{col}`' for col in df.columns])
    placeholders = ', '.join(['%s'] * len(df.columns))
    
    # Prepare the insert statement
    insert_query = f"INSERT INTO `{table_name}` ({columns}) VALUES ({placeholders})"
    
    # Convert DataFrame to list of tuples (handling NaN/None)
    data = [tuple(None if pd.isna(x) else x for x in row) 
            for _, row in df.iterrows()]
    
    try:
        # Use executemany for batch insert
        cursor.executemany(insert_query, data)
        print(f"Inserted {len(data)} rows into '{table_name}'")
    except Error as e:
        print(f"Error inserting data into '{table_name}': {e}")

def process_csv_to_mysql():
    """Main function to process all CSV files and load to MySQL"""
    try:
        # Connect to MySQL with updated authentication plugin
        conn = mysql.connector.connect(
            host='localhost',
            user='root',
            password='password',
            database='ecommerce',
            auth_plugin='mysql_native_password'
        )
        
        with conn.cursor() as cursor:
            for csv_file, table_name in csv_files:
                file_path = os.path.join(folder_path, csv_file)
                
                try:
                    # Read CSV file
                    print(f"\nProcessing {csv_file}...")
                    df = pd.read_csv(file_path)
                    
                    # Clean data
                    df = df.where(pd.notnull(df), None)
                    df.columns = [col.replace(' ', '_').replace('-', '_').replace('.', '_') 
                                for col in df.columns]
                    
                    # Create table
                    columns = ', '.join([f'`{col}` {get_sql_type(df[col].dtype)}' 
                                       for col in df.columns])
                    create_table(cursor, table_name, columns)
                    
                    # Insert data
                    insert_data(cursor, table_name, df)
                    
                    # Commit after each file
                    conn.commit()
                    
                except FileNotFoundError:
                    print(f"Error: File {csv_file} not found in {folder_path}")
                except pd.errors.EmptyDataError:
                    print(f"Error: File {csv_file} is empty or corrupt")
                except Exception as e:
                    print(f"Error processing {csv_file}: {e}")
                    conn.rollback()
                    
    except Error as e:
        print(f"Database connection error: {e}")
    finally:
        if 'conn' in locals() and conn.is_connected():
            conn.close()
            print("\nDatabase connection closed")

if __name__ == "__main__":
    process_csv_to_mysql()


Processing customers.csv...
Table 'customers' created successfully
Inserted 99441 rows into 'customers'

Processing geolocation.csv...
Table 'geolocation' created successfully
Inserted 1000163 rows into 'geolocation'

Processing order_items.csv...
Table 'order_items' created successfully
Inserted 112650 rows into 'order_items'

Processing orders.csv...
Table 'orders' created successfully
Inserted 99441 rows into 'orders'

Processing sales.csv...
Error: File sales.csv not found in D:\Data Analytics\Analytics_Project\archive

Processing products.csv...
Table 'products' created successfully
Inserted 32951 rows into 'products'

Processing sellers.csv...
Table 'sellers' created successfully
Inserted 3095 rows into 'sellers'

Processing payments.csv...
Table 'payments' created successfully
Inserted 103886 rows into 'payments'

Database connection closed


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import mysql.connector

ecommerce_db = mysql.connector.connect(
    host = 'localhost',
    user = 'root',
    password = 'password',
    database = 'ecommerce'
)

# Create a cursor object to execute SQL queries on the connected database
# The cursor allows fetching results, executing commands, and managing transactions
ecommerce_cursor = ecommerce_db.cursor()




---

## **Basic Queries**


### **1. List all unique cities where customers are located.**

In [19]:
unique_cities = """ SELECT distinct customer_city from customers """

ecommerce_cursor.execute(unique_cities)

data = ecommerce_cursor.fetchall()

data


[('franca',),
 ('sao bernardo do campo',),
 ('sao paulo',),
 ('mogi das cruzes',),
 ('campinas',),
 ('jaragua do sul',),
 ('timoteo',),
 ('curitiba',),
 ('belo horizonte',),
 ('montes claros',),
 ('rio de janeiro',),
 ('lencois paulista',),
 ('caxias do sul',),
 ('piracicaba',),
 ('guarulhos',),
 ('pacaja',),
 ('florianopolis',),
 ('aparecida de goiania',),
 ('santo andre',),
 ('goiania',),
 ('cachoeiro de itapemirim',),
 ('sao jose dos campos',),
 ('sao roque',),
 ('camacari',),
 ('resende',),
 ('sumare',),
 ('novo hamburgo',),
 ('sao luis',),
 ('sao jose',),
 ('santa barbara',),
 ('ribeirao preto',),
 ('ituiutaba',),
 ('taquarituba',),
 ('sao jose dos pinhais',),
 ('barrinha',),
 ('parati',),
 ('dourados',),
 ('trindade',),
 ('cascavel',),
 ('fortaleza',),
 ('brasilia',),
 ('pelotas',),
 ('porto alegre',),
 ('salto',),
 ('jundiai',),
 ('cacapava',),
 ('sao vicente',),
 ('uberlandia',),
 ('botelhos',),
 ('sao goncalo',),
 ('araucaria',),
 ('nova iguacu',),
 ('areia branca',),
 ('campo

### **2. Count the number of orders placed in 2017.**

In [16]:
unique_cities = """ SELECT count(order_purchase_timestamp) FROM ecommerce.orders where order_purchase_timestamp > 2017;  """

ecommerce_cursor.execute(unique_cities)

data = ecommerce_cursor.fetchall()

data[0][0]


54011

### **3. Find the total sales per category.**

### **4. Calculate the percentage of orders that were paid in installments.**

### **5. Count the number of customers from each state.** 