In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Monthly Revenue Trend Analysis

df = pd.read_csv('ecommerce_dataset.csv')

df['OrderDate'] = pd.to_datetime(df['OrderDate'])
df['Month'] = df['OrderDate'].dt.month

monthly_revenue = df.groupby('Month')['TotalPrice'].sum().reset_index()

plt.figure(figsize=(10, 6))
plt.plot(monthly_revenue['Month'], monthly_revenue['TotalPrice'], marker='o', linestyle='-')
plt.title('Monthly Revenue Trend (2023)')
plt.xlabel('Month')
plt.ylabel('Total Revenue')
plt.xticks(range(1, 13))
plt.grid(True)
plt.show()

max_month = monthly_revenue.loc[monthly_revenue['TotalPrice'].idxmax()]
print(max_month)

In [None]:
# Product Category vs. Ratings and Revenue

category_stats = df.groupby('ProductCategory').agg( AvgRating=('CustomerRating', 'mean'), TotalRevenue=('TotalPrice', 'sum')).reset_index()

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

ax1.bar(category_stats['ProductCategory'], category_stats['AvgRating'], color='skyblue')
ax1.set_title('Average Customer Rating by Product Category')
ax1.set_ylabel('Average Rating (1-5)')
ax1.set_ylim(0, 5)
ax1.tick_params(axis='x', rotation=45)

ax2.bar(category_stats['ProductCategory'], category_stats['TotalRevenue'], color='salmon')
ax2.set_title('Total Revenue by Product Category')
ax2.set_ylabel('Total Revenue')
ax2.tick_params(axis='x', rotation=45)
ax2.yaxis.set_major_formatter('${x:,.0f}')

plt.tight_layout()
plt.show()

top_rated = category_stats.loc[category_stats['AvgRating'].idxmax()]
top_revenue = category_stats.loc[category_stats['TotalRevenue'].idxmax()]
print(f"Top rated: {top_rated}")
print(f"Top revenue: {top_revenue}")

In [None]:
# Payment Method Analysis
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 7))

payment_counts = df['PaymentMethod'].value_counts()
sns.countplot(data=df, y='PaymentMethod', ax=ax1, order=payment_counts.index)
ax1.set_title('Payment Method Frequency')
ax1.set_xlabel('Number of Orders')

payment_revenue = df.groupby('PaymentMethod')['TotalPrice'].sum().sort_values(ascending=False)
payment_revenue.plot(kind='bar', ax=ax2, color='orange')
ax2.set_title('Total Revenue by Payment Method')
ax2.set_xlabel('Payment Method')
ax2.set_ylabel('Total Revenue')
ax2.yaxis.set_major_formatter('${x:,.0f}')

plt.tight_layout()
plt.show()

most_common_method = payment_counts.idxmax()
highest_revenue_method = payment_revenue.idxmax()
print(f"Most common method: {most_common_method}")
print(f"Highest revenue: {highest_revenue_method}")

In [None]:
# City-Based Sales & Order Value
city_stats = df.groupby('City').agg(TotalSales=('TotalPrice', 'sum'), AvgOrderValue=('TotalPrice', 'mean'), OrderCount=('OrderID', 'count')).sort_values('TotalSales', ascending=False).reset_index()

top_cities = city_stats.head(3)

plt.figure(figsize=(14, 6))

plt.subplot(1, 2, 1)
plt.bar(top_cities['City'], top_cities['TotalSales'], color='royalblue')
plt.title('Top 3 Cities by Total Sales')
plt.ylabel('Total Sales (USD)')
plt.gca().yaxis.set_major_formatter('${x:,.0f}')


plt.subplot(1, 2, 2)
plt.bar(top_cities['City'], top_cities['AvgOrderValue'], color='forestgreen')
plt.title('Average Order Value in Top Cities')
plt.ylabel('Average Order Value (USD)')
plt.gca().yaxis.set_major_formatter('${x:,.0f}')

plt.tight_layout()
plt.show()

# Display 
print("Top 3 Cities by Total Sales:")
print(top_cities[['City', 'TotalSales', 'AvgOrderValue', 'OrderCount']])

In [None]:
# Gender & Product Category Ratings Comparisons
rating_pivot = df.pivot_table(values='CustomerRating', index='ProductCategory', columns='Gender', aggfunc='mean')

# Plot heatmap
plt.figure(figsize=(10, 6))
sns.heatmap( rating_pivot, annot=True, cmap='YlGnBu', fmt='.2f', linewidths=0.5, cbar_kws={'label': 'Average Rating'})
plt.title('Average Ratings by Gender and Product Category')
plt.xlabel('Gender')
plt.ylabel('Product Category')
plt.tight_layout()
plt.show()