In [74]:
import json
import pandas as pd
import numpy as nm

with open("orders.json","r") as file:
    data = json.load(file)

In [75]:
df = pd.json_normalize( data, record_path="items",
    meta=[
        "order_id",
        "order_date",
        ["customer", "customer_id"],
        ["customer", "name"],
        ["customer", "city"],
        ["payment", "method"],
        ["payment", "status"]
    ]
)

In [76]:
df.columns = [
    "Product ID", "Product Name", "Product Category",
    "Price", "Quantity Sold",
    "Order ID", "Order Date",
    "Customer ID", "Customer Name", "Customer City",
    "Payment Method", "Order Status"
]

df["Order Date"] = pd.to_datetime(df["Order Date"])

In [77]:
price_array = df["Price"].to_numpy()
quantity_array = df["Quantity Sold"].to_numpy()

total_amount = price_array * quantity_array

df["Total Amount"] = total_amount


In [78]:
df["Total Amount"] = nm.where(
    df["Order Status"] == "Success",
    df["Total Amount"],
    0
)

In [89]:
order_total = df.groupby("Order ID")["Total Amount"].transform("sum")

df["High Value Order Flag"] = nm.where(
    order_total > 30000,
    "Yes",
    "No"
)

In [108]:
final_df = df[
    [
        "Order ID", "Order Date",
        "Customer ID", "Customer Name", "Customer City",
        "Product Name", "Product Category",
        "Quantity Sold", "Total Amount",
        "Payment Method", "Order Status",
        "High Value Order Flag"
    ]
]

final_df.head()

Unnamed: 0,Order ID,Order Date,Customer ID,Customer Name,Customer City,Product Name,Product Category,Quantity Sold,Total Amount,Payment Method,Order Status,High Value Order Flag
0,ORD1001,2025-01-10,CUST01,Rahul Sharma,Delhi,Laptop,Electronics,1,55000,Credit Card,Success,Yes
1,ORD1001,2025-01-10,CUST01,Rahul Sharma,Delhi,Mouse,Electronics,2,1000,Credit Card,Success,Yes
2,ORD1002,2025-01-11,CUST02,Sneha Verma,Mumbai,Mobile Phone,Electronics,1,32000,UPI,Success,Yes
3,ORD1003,2025-01-12,CUST03,Amit Patel,Pune,Dining Table,Furniture,1,0,Cash,Failed,No
4,ORD1003,2025-01-12,CUST03,Amit Patel,Pune,Chair,Furniture,4,0,Cash,Failed,No


In [91]:
# 1. Total revenue per city
revenue_by_city = final_df.groupby("Customer City")["Total Amount"].sum()

revenue_by_city_df = revenue_by_city.reset_index()
revenue_by_city_df["Total Amount"] = revenue_by_city_df["Total Amount"].apply(
    lambda x: f"₹{x:,.0f}"
)

revenue_by_city_df

Unnamed: 0,Customer City,Total Amount
0,Bangalore,"₹5,000"
1,Chennai,"₹18,000"
2,Delhi,"₹68,000"
3,Mumbai,"₹32,000"
4,Pune,₹0


In [92]:
# 2. Product category with highest revenue
top_category = final_df.groupby("Product Category")["Total Amount"].sum().idxmax()

print(f"The highest revenue is generated from the '{top_category}' category.")

The highest revenue is generated from the 'Electronics' category.


In [99]:
# 3. Count of high-value orders
high_value_count = final_df[
    final_df["High Value Order Flag"] == "Yes"
]["Order ID"].nunique()

print(
    f"There are {high_value_count} high-value order(s), "
    "representing premium transactions for leadership focus."
)


There are 2 high-value order(s), representing premium transactions for leadership focus.


In [118]:
# 4. Average order value

avg_order_value = (
    final_df[final_df["Order Status"] == "Success"]
    .groupby("Order ID")["Total Amount"]
    .sum()
    .mean()
)
print(f"Average order value is ₹{avg_order_value:,.0f} per order.")

Average order value is ₹24,600 per order.


In [119]:
# 5. Most preferred payment method

preferred_payment = (
    final_df[final_df["Order Status"] == "Success"]
    .groupby("Order ID")["Payment Method"]
    .first()
    .value_counts()
    .idxmax()
)

print(f"The most preferred payment method among customers is '{preferred_payment}'.")


The most preferred payment method among customers is 'UPI'.


In [72]:
# 6. Top customers by revenue

top_customers = final_df.groupby("Customer Name")["Total Amount"].sum().sort_values(ascending=False)
print(
    "The list below highlights top customers by revenue, "
    "useful for customer segmentation and retention strategies."
)

top_customers_df = top_customers.reset_index()
top_customers_df["Total Amount"] = top_customers_df["Total Amount"].apply(
    lambda x: f"₹{x:,.0f}"
)

top_customers_df


The list below highlights top customers by revenue, useful for customer segmentation and retention strategies.


Unnamed: 0,Customer Name,Total Amount
0,Rahul Sharma,"₹56,000"
1,Sneha Verma,"₹32,000"
2,Pooja Iyer,"₹18,000"
3,Karan Mehta,"₹12,000"
4,Neha Singh,"₹5,000"
5,Amit Patel,₹0


In [120]:
# 7. Completed vs cancelled orders

order_status_count = (
    final_df
    .groupby("Order ID")["Order Status"]
    .first()
    .value_counts()
    .reset_index()
)

order_status_count.columns = ["Order Status", "Count"]

order_status_count


Unnamed: 0,Order Status,Count
0,Success,5
1,Failed,1
