In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load data
df = pd.read_excel('RetailSalesData.xlsx')

In [None]:
# Convert 'Date' column to datetime
df['Date'] = pd.to_datetime(df['Date'])

In [None]:
# Basic EDA
print('Shape:', df.shape)
print(df.info())
print(df.describe())
print('Missing values:\n', df.isnull().sum())

In [None]:
# Total Revenue
total_revenue = df['TotalSales'].sum()
print(f'\nTotal Revenue: ₹{total_revenue:,.2f}')

In [None]:
# Top 5 Products by Revenue
top_products = df.groupby('Product')['TotalSales'].sum().sort_values(ascending=False).head(5)
print('\nTop 5 Products by Revenue:\n', top_products)

In [None]:
# Top 5 Customers by Spending
top_customers = df.groupby('CustomerName')['TotalSales'].sum().sort_values(ascending=False).head(5)
print('\nTop 5 Customers by Spending:\n', top_customers)

In [None]:
# Monthly Revenue Trend
df['Month'] = df['Date'].dt.to_period('M')
monthly_revenue = df.groupby('Month')['TotalSales'].sum()

monthly_revenue.plot(kind='line', marker='o', figsize=(10, 5), title='Monthly Revenue Trend')
plt.xlabel('Month')
plt.ylabel('Revenue')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Revenue by Category
category_revenue = df.groupby('Category')['TotalSales'].sum().sort_values(ascending=False)

sns.barplot(x=category_revenue.values, y=category_revenue.index, palette='mako')
plt.title('Revenue by Product Category')
plt.xlabel('Total Revenue')
plt.ylabel('Category')
plt.tight_layout()
plt.show()

In [None]:
# Payment Method Distribution
payment_counts = df['PaymentMethod'].value_counts()

sns.barplot(x=payment_counts.index, y=payment_counts.values, palette='pastel')
plt.title('Payment Method Distribution')
plt.ylabel('Number of Orders')
plt.xlabel('Payment Method')
plt.tight_layout()
plt.show()