# Customer Purchase Trend Analysis

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv("retail_sales_data.csv")
df['OrderDate'] = pd.to_datetime(df['OrderDate'], errors='coerce')
df['Revenue'] = df['Quantity'] * df['Price']
df.head()


In [None]:

total_revenue = df['Revenue'].sum()
total_orders = df['OrderID'].nunique()
total_customers = df['CustomerID'].nunique()
avg_order_value = df.groupby('OrderID')['Revenue'].sum().mean()

print("Total Revenue:", f"{total_revenue:,.0f}")
print("Total Orders:", total_orders)
print("Total Customers:", total_customers)
print("Average Order Value:", f"{avg_order_value:,.2f}")


In [None]:

df['YearMonth'] = df['OrderDate'].dt.to_period('M').astype(str)
monthly_sales = df.groupby('YearMonth')['Revenue'].sum().reset_index()

plt.figure()
plt.plot(monthly_sales['YearMonth'], monthly_sales['Revenue'])
plt.title("Monthly Sales Trend")
plt.xlabel("Month")
plt.ylabel("Revenue")
plt.xticks(rotation=45)
plt.show()


In [None]:

product_revenue = df.groupby('Product')['Revenue'].sum().sort_values(ascending=False)
top_products = product_revenue.head(10).reset_index().rename(columns={'Revenue': 'TotalRevenue'})
bottom_products = product_revenue.tail(10).reset_index().rename(columns={'Revenue': 'TotalRevenue'})

plt.figure()
plt.bar(top_products['Product'], top_products['TotalRevenue'])
plt.title("Top 10 Products by Revenue")
plt.xlabel("Product")
plt.ylabel("Total Revenue")
plt.xticks(rotation=45)
plt.show()

plt.figure()
plt.bar(bottom_products['Product'], bottom_products['TotalRevenue'])
plt.title("Bottom 10 Products by Revenue")
plt.xlabel("Product")
plt.ylabel("Total Revenue")
plt.xticks(rotation=45)
plt.show()


In [None]:

customer_revenue = df.groupby('CustomerID')['Revenue'].sum().sort_values(ascending=False)
top_customers = customer_revenue.head(10).reset_index().rename(columns={'Revenue': 'TotalRevenue'})

plt.figure()
plt.bar(top_customers['CustomerID'].astype(str), top_customers['TotalRevenue'])
plt.title("Top 10 Customers by Revenue")
plt.xlabel("Customer ID")
plt.ylabel("Total Revenue")
plt.xticks(rotation=45)
plt.show()


In [None]:

category_revenue = df.groupby('Category')['Revenue'].sum().sort_values(ascending=False).reset_index()

plt.figure()
plt.bar(category_revenue['Category'], category_revenue['Revenue'])
plt.title("Revenue by Category")
plt.xlabel("Category")
plt.ylabel("Revenue")
plt.xticks(rotation=45)
plt.show()
