In [1]:
import numpy as np

# Seed for reproducibility
np.random.seed(42)

# Number of records
n = 200

# Order IDs
order_id = np.arange(1, n+1)

# Product IDs between 100–120
product_id = np.random.randint(100, 121, n)

# Categories
categories = np.array(["Electronics", "Apparel", "Grocery"])
category = np.random.choice(categories, n)

# Quantity sold (1–10)
quantity = np.random.randint(1, 11, n)

# Price per unit depending on category
price_per_unit = np.array([np.random.randint(2000, 6000) if c=="Electronics" else
                           np.random.randint(500, 2000) if c=="Apparel" else
                           np.random.randint(50, 500) for c in category])

# Discount (0–0.2)
discount = np.round(np.random.choice([0, 0.05, 0.1, 0.15, 0.2], n), 2)

# Dates: Jan 2026
dates = np.array([f"2026-01-{np.random.randint(1, 31):02d}" for _ in range(n)])

# Store IDs 1–5
store_id = np.random.randint(1, 6, n)

# Combine into structured NumPy array
dtype = [('order_id', int), ('product_id', int), ('category', 'U15'), 
         ('quantity', int), ('price_per_unit', float), ('discount', float), 
         ('date', 'U10'), ('store_id', int)]

sales_data = np.array(list(zip(order_id, product_id, category, quantity, 
                               price_per_unit, discount, dates, store_id)), dtype=dtype)

# Check first 5 records
sales_data[:5]


array([(1, 106, 'Grocery',  3,  466., 0.2 , '2026-01-01', 3),
       (2, 119, 'Electronics', 10, 2933., 0.05, '2026-01-15', 3),
       (3, 114, 'Electronics',  7, 3140., 0.2 , '2026-01-23', 3),
       (4, 110, 'Grocery', 10,  183., 0.05, '2026-01-02', 5),
       (5, 107, 'Apparel',  5,  557., 0.2 , '2026-01-14', 1)],
      dtype=[('order_id', '<i8'), ('product_id', '<i8'), ('category', '<U15'), ('quantity', '<i8'), ('price_per_unit', '<f8'), ('discount', '<f8'), ('date', '<U10'), ('store_id', '<i8')])

In [2]:
quantities = sales_data['quantity']
print(quantities[:10])  # first 10 quantities


[ 3 10  7 10  5 10  5  7  9  5]


In [3]:
total_price = sales_data['quantity'] * sales_data['price_per_unit']
print(total_price[:10])


[ 1398. 29330. 21980.  1830.  2785. 56270.  1110.  1463.  1998.  6570.]


In [4]:
final_price = total_price * (1 - sales_data['discount'])
print(final_price[:10])


[ 1118.4 27863.5 17584.   1738.5  2228.  50643.    999.   1316.7  1798.2
  6570. ]


In [5]:
categories = np.unique(sales_data['category'])
for cat in categories:
    count = np.sum(sales_data['category'] == cat)
    print(f"{cat}: {count} orders")


Apparel: 67 orders
Electronics: 66 orders
Grocery: 67 orders


In [6]:
for cat in categories:
    mask = sales_data['category'] == cat
    max_sale = np.max(total_price[mask])
    min_sale = np.min(total_price[mask])
    print(f"{cat}: Max={max_sale}, Min={min_sale}")


Apparel: Max=18530.0, Min=597.0
Electronics: Max=56270.0, Min=2389.0
Grocery: Max=4230.0, Min=69.0


In [7]:
high_value_orders = sales_data[total_price > 5000]
print(high_value_orders[:5])


[( 2, 119, 'Electronics', 10, 2933., 0.05, '2026-01-15', 3)
 ( 3, 114, 'Electronics',  7, 3140., 0.2 , '2026-01-23', 3)
 ( 6, 120, 'Electronics', 10, 5627., 0.1 , '2026-01-21', 5)
 (10, 110, 'Apparel',  5, 1314., 0.  , '2026-01-07', 1)
 (16, 101, 'Electronics',  6, 4811., 0.1 , '2026-01-19', 2)]


In [8]:
for cat in categories:
    mask = sales_data['category'] == cat
    avg_discount = np.mean(sales_data['discount'][mask])
    print(f"{cat}: Average Discount = {avg_discount}")


Apparel: Average Discount = 0.09328358208955226
Electronics: Average Discount = 0.10757575757575759
Grocery: Average Discount = 0.091044776119403


In [9]:
stores = np.unique(sales_data['store_id'])
for s in stores:
    mask = sales_data['store_id'] == s
    revenue = np.sum(final_price[mask])
    print(f"Store {s}: Revenue = {revenue}")


Store 1: Revenue = 294276.6
Store 2: Revenue = 465002.10000000003
Store 3: Revenue = 460859.3499999999
Store 4: Revenue = 234531.0
Store 5: Revenue = 384198.75


In [10]:
dates = np.unique(sales_data['date'])
for d in dates[:5]:  # just first 5 dates
    mask = sales_data['date'] == d
    print(f"Date {d}: {np.sum(mask)} orders")


Date 2026-01-01: 10 orders
Date 2026-01-02: 5 orders
Date 2026-01-03: 2 orders
Date 2026-01-04: 9 orders
Date 2026-01-05: 7 orders


In [11]:
unique_products = np.unique(sales_data['product_id'])
total_quantity_per_product = [np.sum(sales_data['quantity'][sales_data['product_id'] == p]) for p in unique_products]

# Sort and get top 5
top_indices = np.argsort(total_quantity_per_product)[-5:][::-1]
for idx in top_indices:
    print(f"Product {unique_products[idx]}: Total Quantity = {total_quantity_per_product[idx]}")


Product 111: Total Quantity = 92
Product 106: Total Quantity = 78
Product 100: Total Quantity = 78
Product 108: Total Quantity = 78
Product 107: Total Quantity = 70


In [12]:
for cat in categories:
    mask = sales_data['category'] == cat
    total_qty = np.sum(sales_data['quantity'][mask])
    print(cat, total_qty)
# Data generation for sales records

Apparel 355
Electronics 368
Grocery 411


In [15]:
avg_order_value = np.mean(final_price)
print(avg_order_value)


9194.339


In [16]:
discounted_orders = np.sum(sales_data['discount'] > 0)
print(discounted_orders)


157


In [17]:
max_index = np.argmax(final_price)
sales_data[max_index], final_price[max_index]


(np.void((98, 109, 'Electronics', 9, 5925.0, 0.05, '2026-01-28', 2), dtype=[('order_id', '<i8'), ('product_id', '<i8'), ('category', '<U15'), ('quantity', '<i8'), ('price_per_unit', '<f8'), ('discount', '<f8'), ('date', '<U10'), ('store_id', '<i8')]),
 np.float64(50658.75))

In [18]:
revenue_bucket = np.where(
    final_price < 3000, "Low",
    np.where(final_price <= 8000, "Medium", "High")
)

revenue_bucket[:10]


array(['Low', 'High', 'High', 'Low', 'Low', 'High', 'Low', 'Low', 'Low',
       'Medium'], dtype='<U6')