In [1]:
from typing import TypeVar


T = TypeVar('T')

def pr(val : T , title:str|None = None , description: str|None = None) : # type: ignore
    if title != None :
        print(f" ---------------------- {title} ---------------------- ")
        if description != None :
            print(f" <-- {description} -->")
    print('type: ' , type(val))
    display(val)
    # return val

In [2]:
%matplotlib inline
from __future__ import annotations

from typing import TypeVar, Iterable, Optional, Sequence, Tuple  , Final# type: ignore
import numpy as np # type: ignore
import numpy.typing as npt # type: ignore
import pandas as pd # type: ignore

# Matplotlib imports with types
import matplotlib.pyplot as plt # type: ignore
from matplotlib.figure import Figure # type: ignore
from matplotlib.axes import Axes # type: ignore

In [3]:
# ---- 1) Reproducible random seed --------------------------------------------
from datetime import timedelta


SEED: Final[int] = 42
rng = np.random.default_rng(SEED)
# ---- 2) Generate synthetic orders -------------------------------------------
N_ORDERS: Final[int] = 60
DAYS:Final[int] = 14
CUSTOMERS: Final[Tuple[str , ...]] = ("Alice", "Bob", "Carol", "Dave", "Eve", "Frank")
CATEGORIES: Final[Tuple[str , ...]] = ("Grocery", "Electronics", "Books")
start = pd.Timestamp('today').normalize() - timedelta(days=DAYS)
end = pd.Timestamp('today').normalize()
date_index = pd.date_range(start , end , freq="D")
pr(start , 'start')
pr(end , 'end')
pr(date_index , 'date_index')


 ---------------------- start ---------------------- 
type:  <class 'pandas._libs.tslibs.timestamps.Timestamp'>


Timestamp('2025-10-08 00:00:00')

 ---------------------- end ---------------------- 
type:  <class 'pandas._libs.tslibs.timestamps.Timestamp'>


Timestamp('2025-10-22 00:00:00')

 ---------------------- date_index ---------------------- 
type:  <class 'pandas.core.indexes.datetimes.DatetimeIndex'>


DatetimeIndex(['2025-10-08', '2025-10-09', '2025-10-10', '2025-10-11',
               '2025-10-12', '2025-10-13', '2025-10-14', '2025-10-15',
               '2025-10-16', '2025-10-17', '2025-10-18', '2025-10-19',
               '2025-10-20', '2025-10-21', '2025-10-22'],
              dtype='datetime64[ns]', freq='D')

In [4]:
# Sample columns
order_dates :npt.NDArray[np.datetime64] = rng.choice(date_index.values ,size=N_ORDERS , replace=True)
order_ids  = np.arange(1  , N_ORDERS + 1 , dtype=np.int64 )
pr(order_dates[:10] , 'order_dates')
pr(order_ids[:10] , 'order_ids')

 ---------------------- order_dates ---------------------- 
type:  <class 'numpy.ndarray'>


array(['2025-10-09T00:00:00.000000000', '2025-10-19T00:00:00.000000000',
       '2025-10-17T00:00:00.000000000', '2025-10-14T00:00:00.000000000',
       '2025-10-14T00:00:00.000000000', '2025-10-20T00:00:00.000000000',
       '2025-10-09T00:00:00.000000000', '2025-10-18T00:00:00.000000000',
       '2025-10-11T00:00:00.000000000', '2025-10-09T00:00:00.000000000'],
      dtype='datetime64[ns]')

 ---------------------- order_ids ---------------------- 
type:  <class 'numpy.ndarray'>


array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [5]:
customer_props : npt.NDArray[np.float64] = np.array([0.22, 0.18, 0.16, 0.16, 0.14, 0.14] , dtype=np.float64)
customers  = rng.choice(CUSTOMERS , size=N_ORDERS ,p=customer_props , replace=True)
categories = rng.choice(CATEGORIES , size=N_ORDERS , replace=True)
pr(customer_props , 'customer_props')
pr(customers , 'customers')
pr(categories , 'categories')

 ---------------------- customer_props ---------------------- 
type:  <class 'numpy.ndarray'>


array([0.22, 0.18, 0.16, 0.16, 0.14, 0.14])

 ---------------------- customers ---------------------- 
type:  <class 'numpy.ndarray'>


array(['Eve', 'Frank', 'Bob', 'Bob', 'Carol', 'Alice', 'Alice', 'Carol',
       'Bob', 'Dave', 'Carol', 'Eve', 'Dave', 'Bob', 'Eve', 'Eve', 'Bob',
       'Bob', 'Dave', 'Alice', 'Alice', 'Alice', 'Eve', 'Dave', 'Dave',
       'Eve', 'Carol', 'Dave', 'Alice', 'Alice', 'Dave', 'Carol', 'Dave',
       'Eve', 'Dave', 'Carol', 'Carol', 'Bob', 'Alice', 'Carol', 'Alice',
       'Carol', 'Eve', 'Bob', 'Alice', 'Bob', 'Bob', 'Dave', 'Carol',
       'Eve', 'Dave', 'Carol', 'Eve', 'Alice', 'Alice', 'Alice', 'Eve',
       'Carol', 'Alice', 'Carol'], dtype='<U5')

 ---------------------- categories ---------------------- 
type:  <class 'numpy.ndarray'>


array(['Books', 'Grocery', 'Electronics', 'Books', 'Electronics',
       'Electronics', 'Grocery', 'Electronics', 'Grocery', 'Grocery',
       'Books', 'Electronics', 'Electronics', 'Electronics', 'Books',
       'Grocery', 'Electronics', 'Grocery', 'Electronics', 'Books',
       'Electronics', 'Books', 'Electronics', 'Books', 'Electronics',
       'Grocery', 'Books', 'Books', 'Grocery', 'Books', 'Grocery',
       'Books', 'Books', 'Electronics', 'Books', 'Grocery', 'Grocery',
       'Grocery', 'Electronics', 'Books', 'Grocery', 'Electronics',
       'Books', 'Grocery', 'Books', 'Grocery', 'Books', 'Electronics',
       'Electronics', 'Grocery', 'Electronics', 'Books', 'Grocery',
       'Books', 'Electronics', 'Books', 'Electronics', 'Electronics',
       'Grocery', 'Electronics'], dtype='<U11')

In [6]:
quantities : npt.NDArray[np.int64] = rng.integers(1,6, size=N_ORDERS  , endpoint=False , dtype=np.int64)
unit_prices: npt.NDArray[np.int64] = rng.integers(5,121 , size=N_ORDERS , endpoint=False , dtype=np.int64) 
pr(quantities , 'quantities')
pr(unit_prices , 'unit_prices')

 ---------------------- quantities ---------------------- 
type:  <class 'numpy.ndarray'>


array([1, 3, 1, 4, 4, 1, 4, 3, 4, 1, 1, 3, 1, 2, 4, 1, 4, 1, 1, 3, 4, 1,
       2, 5, 5, 3, 3, 2, 4, 3, 2, 1, 1, 5, 3, 3, 3, 4, 1, 1, 2, 3, 4, 3,
       3, 5, 1, 3, 3, 3, 5, 2, 5, 2, 3, 3, 5, 3, 2, 1])

 ---------------------- unit_prices ---------------------- 
type:  <class 'numpy.ndarray'>


array([ 47, 100,   6, 108,  67,  21,  40,  69,  41,  17,  82,  82,  18,
        37,  33,  81,  89,  89, 113,  94,  42,  17,  76, 111, 118,  31,
        32,   9,  21,  69,  68,  48,  40, 101,  58,  98, 116,  41,  28,
       115,  78,  38,  77,  64, 110,  34, 120, 113,  25,  24,  66,  10,
        39,  55, 117, 120,  22, 108,  15,  91])

In [7]:
orders:pd.DataFrame = pd.DataFrame({
    "order_id" : order_ids ,
    "date" : pd.to_datetime(order_dates),
    "customer" : customers , 
    "category":categories,
    "qty" : quantities ,
    "unit_price": unit_prices
}).sort_values(['date' , 'order_id'], ignore_index=True)
pr(orders.head(20))
pr(orders.dtypes)

type:  <class 'pandas.core.frame.DataFrame'>


Unnamed: 0,order_id,date,customer,category,qty,unit_price
0,36,2025-10-08,Carol,Grocery,3,98
1,56,2025-10-08,Alice,Books,3,120
2,1,2025-10-09,Eve,Books,1,47
3,7,2025-10-09,Alice,Grocery,4,40
4,10,2025-10-09,Dave,Grocery,1,17
5,18,2025-10-09,Bob,Grocery,1,89
6,33,2025-10-09,Dave,Books,1,40
7,45,2025-10-09,Alice,Books,3,110
8,23,2025-10-10,Eve,Electronics,2,76
9,41,2025-10-10,Alice,Grocery,2,78


type:  <class 'pandas.core.series.Series'>


order_id               int64
date          datetime64[ns]
customer              object
category              object
qty                    int64
unit_price             int64
dtype: object

In [8]:
orders['revenue'] = (orders['unit_price'] * orders['qty']).astype(np.int64)
pr(orders.head())
pr(orders.dtypes)

type:  <class 'pandas.core.frame.DataFrame'>


Unnamed: 0,order_id,date,customer,category,qty,unit_price,revenue
0,36,2025-10-08,Carol,Grocery,3,98,294
1,56,2025-10-08,Alice,Books,3,120,360
2,1,2025-10-09,Eve,Books,1,47,47
3,7,2025-10-09,Alice,Grocery,4,40,160
4,10,2025-10-09,Dave,Grocery,1,17,17


type:  <class 'pandas.core.series.Series'>


order_id               int64
date          datetime64[ns]
customer              object
category              object
qty                    int64
unit_price             int64
revenue                int64
dtype: object

In [9]:
today = pd.Timestamp('today').normalize()
start_7d = today - pd.Timedelta(days=6)
pr(start_7d , 'start_7d')
mask_last7d = (orders['date'] >= start_7d) & (orders['date'] <= today)
pr(mask_last7d.tail() , "mask_last7d")
mask_qty = orders["qty"] > 2
pr(mask_qty.sample(5) , "mask_qty")
filtered : pd.DataFrame = orders[mask_last7d & mask_qty].copy()
pr(filtered , 'filtered')



 ---------------------- start_7d ---------------------- 
type:  <class 'pandas._libs.tslibs.timestamps.Timestamp'>


Timestamp('2025-10-16 00:00:00')

 ---------------------- mask_last7d ---------------------- 
type:  <class 'pandas.core.series.Series'>


55    True
56    True
57    True
58    True
59    True
Name: date, dtype: bool

 ---------------------- mask_qty ---------------------- 
type:  <class 'pandas.core.series.Series'>


37     True
18     True
28    False
35    False
12     True
Name: qty, dtype: bool

 ---------------------- filtered ---------------------- 
type:  <class 'pandas.core.frame.DataFrame'>


Unnamed: 0,order_id,date,customer,category,qty,unit_price,revenue
30,29,2025-10-16,Alice,Grocery,4,21,84
31,34,2025-10-16,Eve,Electronics,5,101,505
32,57,2025-10-16,Eve,Electronics,5,22,110
34,26,2025-10-17,Eve,Grocery,3,31,93
36,8,2025-10-18,Carol,Electronics,3,69,207
37,15,2025-10-18,Eve,Books,4,33,132
38,43,2025-10-18,Eve,Books,4,77,308
39,49,2025-10-18,Carol,Electronics,3,25,75
41,2,2025-10-19,Frank,Grocery,3,100,300
45,25,2025-10-19,Dave,Electronics,5,118,590
