*MINI REAL PROJECT*

## **E-Commerce Sales Analysis (Pandas Only)**

In [2]:
import pandas as pd

In [3]:
sales_df = pd.DataFrame({
    "OrderID":[1001,1002,1003,1004,1005,1006,1007,1008,1009,1010,1011,1012,1013,1014,1015],
    "City":["Delhi","Mumbai","Delhi","Pune","Mumbai","Delhi","Pune","Delhi","Mumbai","Pune","Delhi","Mumbai","Delhi","Pune","Mumbai"],
    "Category":["Electronics","Clothing","Electronics","Furniture","Clothing","Electronics","Furniture","Clothing","Electronics","Furniture","Clothing","Electronics","Electronics","Furniture","Clothing"],
    "Payment_Method":["UPI","Card","UPI","COD","Card","UPI","COD","UPI","Card","UPI","COD","UPI","Card","COD","UPI"],
    "Amount":[25000,3000,18000,22000,4500,27000,15000,3500,20000,24000,3200,26000,21000,23000,4000],
    "Quantity":[1,2,1,1,3,1,2,2,1,1,2,1,1,1,2],
    "Order_Status":["Delivered","Delivered","Cancelled","Delivered","Returned","Delivered","Delivered","Delivered","Delivered","Cancelled","Delivered","Delivered","Returned","Delivered","Delivered"]
})

sales_df

Unnamed: 0,OrderID,City,Category,Payment_Method,Amount,Quantity,Order_Status
0,1001,Delhi,Electronics,UPI,25000,1,Delivered
1,1002,Mumbai,Clothing,Card,3000,2,Delivered
2,1003,Delhi,Electronics,UPI,18000,1,Cancelled
3,1004,Pune,Furniture,COD,22000,1,Delivered
4,1005,Mumbai,Clothing,Card,4500,3,Returned
5,1006,Delhi,Electronics,UPI,27000,1,Delivered
6,1007,Pune,Furniture,COD,15000,2,Delivered
7,1008,Delhi,Clothing,UPI,3500,2,Delivered
8,1009,Mumbai,Electronics,Card,20000,1,Delivered
9,1010,Pune,Furniture,UPI,24000,1,Cancelled


1. Basic Business Insights

In [5]:
# Total Revenue
revenue = sales_df.loc[sales_df["Order_Status"]=="Delivered",["Amount"]].sum()
print("Total Revenue: ",revenue)

Total Revenue:  Amount    171700
dtype: int64


In [7]:
# Total Orders Count
totalorder = sales_df["OrderID"].count()
print("Total orders count : ",totalorder)

Total orders count :  15


In [8]:
# Average Order Value
avg_order_value = sales_df["Amount"].mean()
print("Average Order Value: ",avg_order_value)

Average Order Value:  15946.666666666666


2. Sales Analysis

In [14]:
# Revenue Per City
revenue = sales_df.loc[sales_df["Order_Status"]=="Delivered",["Amount","City"]]
rev_per_city = pd.pivot_table(revenue,index="City",values="Amount",aggfunc="sum")
rev_per_city


Unnamed: 0_level_0,Amount
City,Unnamed: 1_level_1
Delhi,58700
Mumbai,53000
Pune,60000


In [15]:
# Revenue Per Category
rev_per_category = (sales_df[sales_df["Order_Status"]=="Delivered"].groupby("Category")["Amount"].sum())
rev_per_category

Category
Clothing       13700
Electronics    98000
Furniture      60000
Name: Amount, dtype: int64

In [19]:
# Most Selling Category
most_selling_category = pd.pivot_table(sales_df,index="Category",values="Amount",aggfunc="sum")["Amount"].idxmax()
print("The Most Selling Category is: ",most_selling_category)

The Most Selling Category is:  Electronics


In [21]:
# method - 2
sales_df.groupby("Category")["Amount"].sum().idxmax()

'Electronics'

3. Customer Behavior

In [24]:
# Most Used Payment Method
most_used_paymethod = sales_df["Payment_Method"].value_counts().idxmax()
print("Most Used Payment Method: ",most_used_paymethod)

Most Used Payment Method:  UPI


In [26]:
# Order Status Distribution
sales_df["Order_Status"].value_counts(normalize=True)*100

Order_Status
Delivered    73.333333
Cancelled    13.333333
Returned     13.333333
Name: proportion, dtype: float64

4. Advanced Analyst Thinking

In [50]:
# City With Highest Revenue
hig_rev_city = sales_df[sales_df["Order_Status"]=="Delivered"].groupby("City")["Amount"].sum().idxmax()
hig_rev_city

'Pune'

In [29]:
# Category With Highest Revenue
sales_df[sales_df["Order_Status"]=="Delivered"].groupby("Category")["Amount"].sum().idxmax()

'Electronics'

5 — Real Business Intelligence

In [44]:
# Revenue Contribution % Per Category
rev = sales_df.loc[sales_df["Order_Status"]=="Delivered",["Amount","Category"]]
cat_rev = rev.groupby("Category")["Amount"].sum()
cat_rev_percentage = (cat_rev/cat_rev.sum())*100
print(cat_rev_percentage)

Category
Clothing        7.979033
Electronics    57.076296
Furniture      34.944671
Name: Amount, dtype: float64


In [46]:
# Create Order Size Column

#  Amount > 20000 → Large
# 1000–20000 → Medium
#  < 5000 → Small

sales_df["Order_Size"] = sales_df["Amount"].apply(lambda x: "Large" if x >20000 else "small" if x<5000 else "Medium")
sales_df

Unnamed: 0,OrderID,City,Category,Payment_Method,Amount,Quantity,Order_Status,Order_Size
0,1001,Delhi,Electronics,UPI,25000,1,Delivered,Large
1,1002,Mumbai,Clothing,Card,3000,2,Delivered,small
2,1003,Delhi,Electronics,UPI,18000,1,Cancelled,Medium
3,1004,Pune,Furniture,COD,22000,1,Delivered,Large
4,1005,Mumbai,Clothing,Card,4500,3,Returned,small
5,1006,Delhi,Electronics,UPI,27000,1,Delivered,Large
6,1007,Pune,Furniture,COD,15000,2,Delivered,Medium
7,1008,Delhi,Clothing,UPI,3500,2,Delivered,small
8,1009,Mumbai,Electronics,Card,20000,1,Delivered,Medium
9,1010,Pune,Furniture,UPI,24000,1,Cancelled,Large


In [48]:
# Order Size Distribution
(sales_df["Order_Size"].value_counts(normalize=True)*100).round(2)

Order_Size
Large     46.67
small     33.33
Medium    20.00
Name: proportion, dtype: float64

In [56]:
# Revenue breakdown
delivered_df = sales_df[sales_df["Order_Status"]=="Delivered"]
cat_rev = delivered_df.groupby("Category")["Amount"].sum()
rev_percentage = ((cat_rev/cat_rev.sum())*100).round(2)
rev_percentage

Category
Clothing        7.98
Electronics    57.08
Furniture      34.94
Name: Amount, dtype: float64

# **Final Project Output**

In [66]:
# Top City
# Top Category
# Most Payment Mode
# Revenue Breakdown

summary = ({
    "Top City": hig_rev_city,
    "Top Category": most_selling_category,
    "Most Payment Method": most_used_paymethod
    })
final_summary = pd.DataFrame(list(summary.items()),columns=["Matric","Result"])
print("FINAL SUMMARY")
print(final_summary)
print("\nREVENUE BREAKDOWN \n")
print(rev_percentage)



FINAL SUMMARY
                Matric       Result
0             Top City         Pune
1         Top Category  Electronics
2  Most Payment Method          UPI

REVENUE BREAKDOWN 

Category
Clothing        7.98
Electronics    57.08
Furniture      34.94
Name: Amount, dtype: float64
