# E-commerce Customer Analysis

This project analyzes customer orders to understand sales trends and identify top customers.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set a clean style for plots
sns.set_theme(style="whitegrid")

### 1. Load Data

In [None]:
df = pd.read_csv("../data/raw_data.csv")
print("Data loaded successfully. Here are the first few rows:")
df.head()

### 2. Basic Cleaning

We will convert the order date to a proper format and calculate the total amount for each order.

In [None]:
# Convert order_date to datetime
df['order_date'] = pd.to_datetime(df['order_date'])

# Calculate total amount (Price * Quantity - Discount)
df['total_amount'] = (df['price'] * df['quantity']) - df['discount']

print("Basic cleaning done. New column 'total_amount' added.")
df.head()

### 3. Exploratory Data Analysis (EDA)

In [None]:
# Total Sales by Category
plt.figure(figsize=(10, 5))
sns.barplot(data=df, x='category', y='total_amount', estimator=sum, errorbar=None, palette='viridis')
plt.title('Total Sales by Category')
plt.ylabel('Total Revenue')
plt.xlabel('Category')
plt.show()

In [None]:
# Top 5 Customers by Total Spending
top_customers = df.groupby('customer_id')['total_amount'].sum().sort_values(ascending=False).head(5)

plt.figure(figsize=(10, 5))
top_customers.plot(kind='bar', color='skyblue')
plt.title('Top 5 Customers by Spending')
plt.ylabel('Total Spending')
plt.xlabel('Customer ID')
plt.xticks(rotation=0)
plt.show()

### 4. Simple Customer Status

We'll label customers based on whether they have spent more than the average.

In [None]:
avg_spend = df['total_amount'].mean()

def get_status(spend):
    if spend > avg_spend:
        return 'High Value'
    else:
        return 'Standard'

df['customer_type'] = df['total_amount'].apply(get_status)
print("Customer status assigned based on average spending:")
df[['customer_id', 'total_amount', 'customer_type']].head()

### 5. Export Cleaned Data

In [None]:
df.to_csv("../data/cleaned_data.csv", index=False)
print("Cleaned data saved to 'data/cleaned_data.csv'.")