# 📊 Sales Data Analysis

This notebook performs a detailed analysis on sales data, exploring revenue trends, customer demographics, product performance, and profitability insights.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('ggplot')
sns.set_theme(style='whitegrid')

In [None]:
# Load data
df = pd.read_csv('sales_data.csv')
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')
df['date'] = pd.to_datetime(df['date'])
df['month'] = df['date'].dt.to_period('M')
df['year'] = df['date'].dt.year
df.head()

## 📈 Monthly Revenue Trend

In [None]:
monthly_revenue = df.groupby('month')['revenue'].sum()
monthly_revenue.index = monthly_revenue.index.to_timestamp()
monthly_revenue.plot(marker='o', figsize=(12, 6), title='Monthly Revenue Trend')
plt.xlabel('Month')
plt.ylabel('Revenue')
plt.grid(True)
plt.tight_layout()
plt.show()

## 🌍 Revenue by Country

In [None]:
country_revenue = df.groupby('country')['revenue'].sum().sort_values(ascending=False)
sns.barplot(x=country_revenue.index, y=country_revenue.values)
plt.title('Revenue by Country')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 🏙️ Top 10 States by Revenue

In [None]:
top_states = df.groupby('state')['revenue'].sum().nlargest(10)
top_states.plot(kind='bar', figsize=(10,5), title='Top 10 States by Revenue')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 🛍️ Revenue by Product Category

In [None]:
category_revenue = df.groupby('product_category')['revenue'].sum().sort_values(ascending=False)
sns.barplot(x=category_revenue.index, y=category_revenue.values)
plt.title('Revenue by Product Category')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 📦 Top 10 Products by Revenue

In [None]:
top_products = df.groupby('product')['revenue'].sum().nlargest(10)
top_products.plot(kind='bar', figsize=(10,5), title='Top 10 Products by Revenue')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 👩‍💼 Revenue by Gender

In [None]:
gender_revenue = df.groupby('customer_gender')['revenue'].sum()
gender_revenue.plot(kind='pie', autopct='%1.1f%%', startangle=90, shadow=True, figsize=(6, 6))
plt.title('Revenue by Customer Gender')
plt.ylabel('')
plt.tight_layout()
plt.show()

## 👶 Revenue by Age Group

In [None]:
age_group_revenue = df.groupby('age_group')['revenue'].sum().sort_values(ascending=False)
sns.barplot(x=age_group_revenue.index, y=age_group_revenue.values)
plt.title('Revenue by Age Group')
plt.tight_layout()
plt.show()

## 📈 Profit vs Revenue by Category

In [None]:
sns.scatterplot(x='revenue', y='profit', hue='product_category', data=df)
plt.title('Profit vs Revenue by Product Category')
plt.tight_layout()
plt.show()

## 📌 Key Insights
- Revenue peaks around certain months suggesting seasonal trends.
- A few product categories dominate sales.
- Gender and age groups vary in spending behavior.
- Some products generate high revenue but low profit, highlighting margin challenges.

## ✅ Conclusion
This analysis provides actionable insights into sales performance across regions, products, and customer demographics. Future analysis could include customer lifetime value, churn prediction, or promotional strategy impact.