# Sales Data Analysis

This notebook performs the following tasks:

1. Analyze revenue by category and time.
2. Identify key customers.
3. Segment customers.
4. Visualize trends in sales and customer behavior.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load dataset
file_path = 'big_data_sales_dataset.csv'
df = pd.read_csv(file_path)

# Preview the data
df.head()

In [None]:
# Convert order_date to datetime and extract month
df['order_date'] = pd.to_datetime(df['order_date'])
df['month'] = df['order_date'].dt.to_period('M')

# Analyze revenue by category
category_sales = df.groupby('category')['total_price'].sum().sort_values(ascending=False)
category_sales

In [None]:
# Plot top categories by revenue
plt.figure(figsize=(10, 6))
category_sales.plot(kind='bar', color='skyblue')
plt.title('Top Categories by Revenue')
plt.xlabel('Category')
plt.ylabel('Total Revenue')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Analyze monthly sales
monthly_sales = df.groupby('month')['total_price'].sum()
monthly_sales

In [None]:
# Plot monthly sales trend
plt.figure(figsize=(10, 6))
monthly_sales.plot(kind='line', marker='o', color='orange')
plt.title('Monthly Sales Trend')
plt.xlabel('Month')
plt.ylabel('Total Revenue')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Identify key customers
top_customers = df.groupby('customer_id')['total_price'].sum().sort_values(ascending=False).head(10)
top_customers

In [None]:
# Customer segmentation
customer_segmentation = df.groupby('customer_id').agg({
    'total_price': 'sum',
    'order_id': 'count'
}).rename(columns={'total_price': 'Total Revenue', 'order_id': 'Total Orders'})
customer_segmentation['AOV'] = customer_segmentation['Total Revenue'] / customer_segmentation['Total Orders']
customer_segmentation.sort_values(by='Total Revenue', ascending=False).head(10)

In [None]:
# Visualize customer segmentation
plt.figure(figsize=(10, 6))
plt.scatter(customer_segmentation['Total Orders'], customer_segmentation['Total Revenue'], alpha=0.5)
plt.title('Customer Segmentation: Orders vs Revenue')
plt.xlabel('Total Orders')
plt.ylabel('Total Revenue')
plt.grid()
plt.show()