In [8]:
import pandas as pd
import psycopg2
import os

# Correct folder path where the CSV file is stored
folder_path = 'C:/Users/moham/Desktop/Data Analysis/2025 Data Analysis/SQL Zero - Hero/Retail Sales Analysis'

# ✅ Make sure this filename matches exactly (including spaces and extension)
csv_files = [
    ('SQL - Retail Sales Analysis_utf .csv', 'retail_sales')  # Update if name is different
]

# Function to map pandas dtypes to SQL types
def get_sql_type(dtype):
    if pd.api.types.is_integer_dtype(dtype):
        return 'INTEGER'
    elif pd.api.types.is_float_dtype(dtype):
        return 'REAL'
    elif pd.api.types.is_bool_dtype(dtype):
        return 'BOOLEAN'
    elif pd.api.types.is_datetime64_any_dtype(dtype):
        return 'TIMESTAMP'
    else:
        return 'TEXT'

try:
    # Connect to PostgreSQL
    conn = psycopg2.connect(
        host='localhost',
        user='postgres',
        password='mohammad',
        dbname='Retail Sales Analysis',  # ✅ Correct spelling
        port='5432'
    )
    cursor = conn.cursor()
    print("✅ Connected to the database.")

    for csv_file, table_name in csv_files:
        file_path = os.path.join(folder_path, csv_file)
        print(f"📂 Trying to read file: {file_path}")

        if not os.path.exists(file_path):
            print(f"❌ File not found: {file_path}")
            continue

        df = pd.read_csv(file_path, encoding='ISO-8859-1')
        df = df.where(pd.notnull(df), None)
        df.columns = [col.replace(' ', '_').replace('-', '_').replace('.', '_') for col in df.columns]

        # Create table if not exists
        columns = ', '.join([f'"{col}" {get_sql_type(df[col].dtype)}' for col in df.columns])
        create_table_query = f'CREATE TABLE IF NOT EXISTS "{table_name}" ({columns})'
        cursor.execute(create_table_query)
        print(f"🛠️ Table `{table_name}` created or already exists.")

        # Insert rows
        for _, row in df.iterrows():
            values = tuple(None if pd.isna(x) else x for x in row)
            sql = f'INSERT INTO "{table_name}" ({", ".join([f"""\"{col}\"""" for col in df.columns])}) VALUES ({", ".join(["%s"] * len(row))})'
            cursor.execute(sql, values)

        conn.commit()
        print(f"✅ Data from `{csv_file}` inserted into `{table_name}`.")

except psycopg2.Error as err:
    print(f"❗Database error: {err}")
except Exception as e:
    print(f"❗General error: {e}")
finally:
    if 'cursor' in locals(): cursor.close()
    if 'conn' in locals(): conn.close()
    print("🔒 Database connection closed.")



✅ Connected to the database.
📂 Trying to read file: C:/Users/moham/Desktop/Data Analysis/2025 Data Analysis/SQL Zero - Hero/Retail Sales Analysis\SQL - Retail Sales Analysis_utf .csv
🛠️ Table `retail_sales` created or already exists.
✅ Data from `SQL - Retail Sales Analysis_utf .csv` inserted into `retail_sales`.
🔒 Database connection closed.


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [20]:
dd = pd.read_csv("./SQL - Retail Sales Analysis_utf .csv")
dd.head()

Unnamed: 0,transactions_id,sale_date,sale_time,customer_id,gender,age,category,quantiy,price_per_unit,cogs,total_sale
0,180,05-11-2022,10:50:00,117,Male,41.0,Clothing,3.0,300.0,129.0,900.0
1,522,09-07-2022,11:00:00,52,Male,46.0,Beauty,3.0,500.0,145.0,1500.0
2,559,12-12-2022,10:48:00,5,Female,40.0,Clothing,4.0,300.0,84.0,1200.0
3,1180,06-01-2022,08:53:00,85,Male,41.0,Clothing,3.0,300.0,129.0,900.0
4,1522,14-11-2022,08:35:00,48,Male,46.0,Beauty,3.0,500.0,235.0,1500.0


In [23]:
print(dd.columns)

Index(['transactions_id', 'sale_date', 'sale_time', 'customer_id', 'gender',
       'age', 'category', 'quantiy', 'price_per_unit', 'cogs', 'total_sale'],
      dtype='object')


In [24]:
# Create a DataFrame from the given data
data = {
    "Category": ["Clothing", "Electronics", "Beauty", "Electronics", "Clothing", "Beauty"],
    "Gender": ["Female", "Male", "Female", "Female", "Male", "Male"],
    "Count": [347, 344, 330, 340, 354, 282]
}

table = pd.DataFrame(data)
print(table)

      Category  Gender  Count
0     Clothing  Female    347
1  Electronics    Male    344
2       Beauty  Female    330
3  Electronics  Female    340
4     Clothing    Male    354
5       Beauty    Male    282
