# 🧩 Notebook-06: Descriptive Statistics, Correlation, and Random Sampling

In [1]:
import sys
from pathlib import Path

# ✅ Setup project path
PROJECT_ROOT = Path.cwd().parent
SCRIPT_DIR = PROJECT_ROOT / "scripts"
if str(SCRIPT_DIR) not in sys.path:
    sys.path.insert(0, str(SCRIPT_DIR))

# ✅ Imports
import numpy as np

from stats_utils import (
    summarize_array,
    quantiles,
    compute_covariance,
    compute_correlation,
    minmax_normalize,
    zscore_normalize,
    robust_scale,
    generate_random_integers,
    generate_random_floats,
    generate_normal_distribution,
    random_sample,
    histogram_binning,
    compute_bincount
)

print("📊 NumPy Statistics & Probability\n")

📊 NumPy Statistics & Probability



In [2]:
# ✅ 1. Descriptive statistics
data = np.array([10, 15, 12, 18, 20, 25])
print("🔢 Data:", data)

summary = summarize_array(data)
print("\n📌 Summary Statistics:")
for k, v in summary.items():
    print(f"{k:>20}: {v:.2f}")

print("\n📐 Quantiles:", quantiles(data))  # 0.25, 0.5, 0.75

🔢 Data: [10 15 12 18 20 25]

📌 Summary Statistics:
                mean: 16.67
              median: 16.50
                 std: 5.02
                 var: 25.22
                 min: 10.00
                 max: 25.00
               range: 15.00
     25th_percentile: 12.75
     75th_percentile: 19.50
                 IQR: 6.75

📐 Quantiles: [12.75 16.5  19.5 ]


In [3]:
# ✅ 2. Correlation and covariance
X = np.array([1, 2, 3, 4, 5])
Y = np.array([2, 4, 6, 8, 10])

print("\n🔗 Covariance Matrix:\n", compute_covariance(X, Y))
print("🔗 Correlation Coefficient Matrix:\n", compute_correlation(X, Y))


🔗 Covariance Matrix:
 [[ 2.5  5. ]
 [ 5.  10. ]]
🔗 Correlation Coefficient Matrix:
 [[1. 1.]
 [1. 1.]]


In [4]:
# ✅ 3. Normalization & Scaling
print("\n🔄 Min-Max Normalized Data:", minmax_normalize(data))
print("📏 Z-score Normalized Data:", zscore_normalize(data))
print("📉 Robust Scaled Data:", robust_scale(data))


🔄 Min-Max Normalized Data: [0.         0.33333333 0.13333333 0.53333333 0.66666667 1.        ]
📏 Z-score Normalized Data: [-1.32744662 -0.33186166 -0.92921264  0.26548932  0.66372331  1.65930828]
📉 Robust Scaled Data: [-0.40740741  0.33333333 -0.11111111  0.77777778  1.07407407  1.81481481]


In [5]:
# ✅ 4. Random number generation
print("\n🎲 Random Integers:", generate_random_integers(0, 10, 5, seed=42))
print("🎲 Random Floats:", generate_random_floats(5, seed=42))
print("🎲 Random Normal Dist:", generate_normal_distribution(5, seed=42))


🎲 Random Integers: [6 3 7 4 6]
🎲 Random Floats: [0.37454012 0.95071431 0.73199394 0.59865848 0.15601864]
🎲 Random Normal Dist: [ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337]


In [6]:
# ✅ 5. Sampling from array
sample_space = np.array([1, 2, 3, 4, 5])
sample = random_sample(sample_space, sample_size=3, replace=True)
print("\n🧪 Sample (with replacement):", sample)


🧪 Sample (with replacement): [3 3 5]


In [7]:
# ✅ 6. Shuffling and permutation (manual)
arr = np.arange(10)
print("\n🎰 Before shuffle:", arr)
np.random.seed(42)
np.random.shuffle(arr)
print("🎰 After shuffle:", arr)

permuted = np.random.permutation(10)
print("🔀 Random Permutation:", permuted)


🎰 Before shuffle: [0 1 2 3 4 5 6 7 8 9]
🎰 After shuffle: [8 1 5 0 7 2 9 4 3 6]
🔀 Random Permutation: [0 1 8 5 3 4 7 9 6 2]


In [8]:
# ✅ 7. Histogram binning (basic)
data_hist = generate_random_integers(1, 100, 20, seed=1)
counts, bins = histogram_binning(data_hist, bins=5)
print("\n📊 Histogram Counts:", counts)
print("📊 Histogram Bins:", bins)


📊 Histogram Counts: [7 4 2 1 6]
📊 Histogram Bins: [ 2.  18.6 35.2 51.8 68.4 85. ]


In [9]:
# ✅ 8. Bincount (non-negative ints only)
nonneg = np.array([1, 1, 2, 3, 3, 3])
print("\n🔢 Bincount:", compute_bincount(nonneg))


🔢 Bincount: [0 2 1 3]
