In [1]:
import numpy as np
import pandas as pd
from microdf import MicroSeries

# Create sample data for testing
np.random.seed(42)
data = np.random.normal(100, 20, 1000)
weights = np.random.exponential(1, 1000)

# Create MicroSeries instance
ms = MicroSeries(data, weights=weights)

print("Sample MicroSeries created:")
print(f"Data shape: {ms.shape}")
print(f"Weights shape: {ms.weights.shape}")
print(f"MicroSeries type: {type(ms)}")
print(f"First 5 values: {ms.head()}")
print(f"First 5 weights: {ms.weights.head()}")
print(f"Weights sum: {ms.weights.sum()}")
print(f"All weights are zero: {(ms.weights == 0).all()}")

# Safety check: ensure weights are not zero
if ms.weights.sum() == 0:
    print("⚠️  ERROR: Weights sum to zero! This should not happen with exponential distribution.")
    print("⚠️  Recreating with explicit positive weights...")
    ms = MicroSeries(data, weights=np.ones(len(data)))
    print(f"Fixed weights sum: {ms.weights.sum()}")
else:
    print("✓ Weights are properly initialized")

print("=" * 50)

Sample MicroSeries created:
Data shape: (1000,)
Weights shape: (1000,)
MicroSeries type: <class 'microdf.generic.MicroSeries'>
First 5 values: 0    109.934283
1     97.234714
2    112.953771
3    130.460597
4     95.316933
dtype: float64
First 5 weights: 0    0.183301
1    0.110449
2    1.011784
3    1.225795
4    0.032096
dtype: float64
Weights sum: 1007.9859336968675
All weights are zero: False
✓ Weights are properly initialized


In [2]:
# Test 1: Basic Statistical Operations (scalar functions)
print("=== BASIC STATISTICAL OPERATIONS ===\n")

# sum()
result = ms.sum()
print(f"sum(): {result}")
print(f"Type: {type(result)}")
print()

# count()
result = ms.count()
print(f"count(): {result}")
print(f"Type: {type(result)}")
print()

# mean()
result = ms.mean()
print(f"mean(): {result}")
print(f"Type: {type(result)}")
print()

# median()
result = ms.median()
print(f"median(): {result}")
print(f"Type: {type(result)}")
print()

# gini()
result = ms.gini()
print(f"gini(): {result}")
print(f"Type: {type(result)}")
print()

# gini() with negatives handling
result = ms.gini(negatives="zero")
print(f"gini(negatives='zero'): {result}")
print(f"Type: {type(result)}")
print()

# Test 2: Quantile operations
print("=== QUANTILE OPERATIONS ===\n")

# quantile() with single value
result = ms.quantile(0.5)
print(f"quantile(0.5): {result}")
print(f"Type: {type(result)}")
print()

# quantile() with array
result = ms.quantile([0.25, 0.5, 0.75])
print(f"quantile([0.25, 0.5, 0.75]): {result}")
print(f"Type: {type(result)}")
print()

=== BASIC STATISTICAL OPERATIONS ===

sum(): 101015.38419115798
Type: <class 'numpy.float64'>

count(): 1007.9859336968675
Type: <class 'numpy.float64'>

mean(): 100.21507326067153
Type: <class 'numpy.float64'>

median(): 101.37310698663197
Type: <class 'numpy.float64'>

gini(): 0.10798101497882602
Type: <class 'numpy.float64'>

gini(negatives='zero'): 0.10798101497882602
Type: <class 'numpy.float64'>

=== QUANTILE OPERATIONS ===

quantile(0.5): 101.37310698663197
Type: <class 'numpy.float64'>

quantile([0.25, 0.5, 0.75]): 0.25     87.986469
0.50    101.373107
0.75    112.444198
dtype: float64
Type: <class 'pandas.core.series.Series'>



In [3]:
# Test 3: Distribution Share Operations (scalar functions)
print("=== DISTRIBUTION SHARE OPERATIONS ===\n")

# top_x_pct_share()
result = ms.top_x_pct_share(0.1)
print(f"top_x_pct_share(0.1): {result}")
print(f"Type: {type(result)}")
print()

# bottom_x_pct_share()
result = ms.bottom_x_pct_share(0.1)
print(f"bottom_x_pct_share(0.1): {result}")
print(f"Type: {type(result)}")
print()

# Specific percentile shares
result = ms.top_50_pct_share()
print(f"top_50_pct_share(): {result}")
print(f"Type: {type(result)}")
print()

result = ms.bottom_50_pct_share()
print(f"bottom_50_pct_share(): {result}")
print(f"Type: {type(result)}")
print()

result = ms.top_10_pct_share()
print(f"top_10_pct_share(): {result}")
print(f"Type: {type(result)}")
print()

result = ms.top_1_pct_share()
print(f"top_1_pct_share(): {result}")
print(f"Type: {type(result)}")
print()

result = ms.top_0_1_pct_share()
print(f"top_0_1_pct_share(): {result}")
print(f"Type: {type(result)}")
print()

# t10_b50 ratio
result = ms.t10_b50()
print(f"t10_b50(): {result}")
print(f"Type: {type(result)}")
print()

=== DISTRIBUTION SHARE OPERATIONS ===

top_x_pct_share(0.1): 0.13429305565724164
Type: <class 'numpy.float64'>

bottom_x_pct_share(0.1): 0.06503891201009249
Type: <class 'numpy.float64'>

top_50_pct_share(): 0.5738865802565075
Type: <class 'numpy.float64'>

bottom_50_pct_share(): 0.4261134197434925
Type: <class 'numpy.float64'>

top_10_pct_share(): 0.13429305565724164
Type: <class 'numpy.float64'>

top_1_pct_share(): 0.01517708357981688
Type: <class 'numpy.float64'>

top_0_1_pct_share(): 0.0008919136026892724
Type: <class 'numpy.float64'>

t10_b50(): 0.3151580059085725
Type: <class 'numpy.float64'>



In [4]:
# Test 4: Vector Operations (return Series or MicroSeries)
print("=== VECTOR OPERATIONS ===\n")

# First, let's check if our main ms object is healthy
print(f"Main ms weights sum: {ms.weights.sum()}")
print(f"Main ms weights first 5: {ms.weights.head()}")
print()

# If weights sum is zero, recreate the object
if ms.weights.sum() == 0:
    print("⚠️  Main ms object has zero weights! Recreating...")
    ms = MicroSeries(data, weights=weights)
    print(f"Recreated ms weights sum: {ms.weights.sum()}")
    print()

# weight()
result = ms.weight()
print(f"weight(): first 5 values = {result.head()}")
print(f"Type: {type(result)}")
print()

# cumsum()
result = ms.cumsum()
print(f"cumsum(): first 5 values = {result.head()}")
print(f"Type: {type(result)}")
print()

# rank()
result = ms.rank()
print(f"rank(): first 5 values = {result.head()}")
print(f"Type: {type(result)}")
print()

# rank() with percentage
result = ms.rank(pct=True)
print(f"rank(pct=True): first 5 values = {result.head()}")
print(f"Type: {type(result)}")
print()

# decile_rank()
result = ms.decile_rank()
print(f"decile_rank(): first 5 values = {result.head()}")
print(f"Type: {type(result)}")
print()

# decile_rank() with .clip()
result = ms.decile_rank().clip(1, 10)
print(f"decile_rank() clipping: first 5 values = {result.head()}")
print(f"Type: {type(result)}")
print()

# decile_rank() with .round()
result = ms.decile_rank().round(3)
print(f"decile_rank() rounding: first 5 values = {result.head()}")
print(f"Type: {type(result)}")
print()

# quintile_rank()
result = ms.quintile_rank()
print(f"quintile_rank(): first 5 values = {result.head()}")
print(f"Type: {type(result)}")
print()

# quartile_rank()
result = ms.quartile_rank()
print(f"quartile_rank(): first 5 values = {result.head()}")
print(f"Type: {type(result)}")
print()

# percentile_rank()
result = ms.percentile_rank()
print(f"percentile_rank(): first 5 values = {result.head()}")
print(f"Type: {type(result)}")
print()

cumsum() returns cumulative sums of weighted values as a regular pandas Series. The original weights have already been applied and cannot be reused with the cumulative results.


=== VECTOR OPERATIONS ===

Main ms weights sum: 1007.9859336968675
Main ms weights first 5: 0    0.183301
1    0.110449
2    1.011784
3    1.225795
4    0.032096
dtype: float64

weight(): first 5 values = 0     20.151079
1     10.739459
2    114.284831
3    159.917940
4      3.059268
dtype: float64
Type: <class 'pandas.core.series.Series'>

cumsum(): first 5 values = 0     20.151079
1     30.890538
2    145.175369
3    305.093309
4    308.152577
dtype: float64
Type: <class 'pandas.core.series.Series'>

rank(): first 5 values = 0    717.256381
1    431.516486
2    766.519285
3    937.492572
4    398.353953
dtype: float64
Type: <class 'microdf.generic.MicroSeries'>

rank(pct=True): first 5 values = 0    0.711574
1    0.428098
2    0.760446
3    0.930065
4    0.395198
dtype: float64
Type: <class 'microdf.generic.MicroSeries'>

decile_rank(): first 5 values = 0     8.0
1     5.0
2     8.0
3    10.0
4     4.0
dtype: float64
Type: <class 'microdf.generic.MicroSeries'>

decile_rank() clipping

In [5]:
# Test 5: Utility Operations
print("=== UTILITY OPERATIONS ===\n")

# set_weights()
ms_copy = ms.copy()
new_weights = np.ones(len(ms))
ms_copy.set_weights(new_weights)
print(f"set_weights(): Changed weights to all ones")
print(f"Original weights sum: {ms.weights.sum()}")
print(f"New weights sum: {ms_copy.weights.sum()}")
print(f"Type: {type(ms_copy)}")
print()

# copy()
result = ms.copy()
print(f"copy(): Created copy")
print(f"Type: {type(result)}")
print(f"Is same object: {result is ms}")
print(f"Are equal: {result.equals(ms)}")
print()

# equals()
result = ms.equals(ms_copy)
print(f"equals(): Comparing original with modified copy = {result}")
print(f"Type: {type(result)}")
print()

result = ms.equals(ms)
print(f"equals(): Comparing with itself = {result}")
print(f"Type: {type(result)}")
print()

# groupby() - create a simple grouping variable
group_var = pd.Series(np.random.choice(['A', 'B', 'C'], size=len(ms)))
result = ms.groupby(group_var)
print(f"groupby(): Created groupby object")
print(f"Type: {type(result)}")
print(f"Groups: {list(result.groups.keys())}")
print()

=== UTILITY OPERATIONS ===

set_weights(): Changed weights to all ones
Original weights sum: 1007.9859336968675
New weights sum: 1000.0
Type: <class 'microdf.generic.MicroSeries'>

copy(): Created copy
Type: <class 'microdf.generic.MicroSeries'>
Is same object: False
Are equal: True

equals(): Comparing original with modified copy = False
Type: <class 'bool'>

equals(): Comparing with itself = True
Type: <class 'bool'>

groupby(): Created groupby object
Type: <class 'microdf.generic.MicroSeriesGroupBy'>
Groups: ['A', 'B', 'C']



In [6]:
# Test 6: Arithmetic Operations
print("=== ARITHMETIC OPERATIONS ===\n")

# Create a smaller sample for cleaner output
sample_size = 5
ms_small = MicroSeries(data[:sample_size], weights=weights[:sample_size])
print(f"Original values: {ms_small.values}")
print(f"Original weights: {ms_small.weights.values}")
print()

# Addition
result = ms_small + 10
print(f"Addition (+10): {result.values}")
print(f"Type: {type(result)}")
print(f"Weights preserved: {np.array_equal(result.weights.values, ms_small.weights.values)}")
print()

# Subtraction
result = ms_small - 5
print(f"Subtraction (-5): {result.values}")
print(f"Type: {type(result)}")
print()

# Multiplication
result = ms_small * 2
print(f"Multiplication (*2): {result.values}")
print(f"Type: {type(result)}")
print()

# Division
result = ms_small / 2
print(f"Division (/2): {result.values}")
print(f"Type: {type(result)}")
print()

# Floor division
result = ms_small // 2
print(f"Floor division (//2): {result.values}")
print(f"Type: {type(result)}")
print()

# Modulo
result = ms_small % 10
print(f"Modulo (%10): {result.values}")
print(f"Type: {type(result)}")
print()

# Power
result = ms_small ** 2
print(f"Power (**2): {result.values}")
print(f"Type: {type(result)}")
print()

=== ARITHMETIC OPERATIONS ===

Original values: [109.93428306  97.23471398 112.95377076 130.46059713  95.31693251]
Original weights: [0.18330114 0.11044882 1.01178411 1.22579494 0.03209575]

Addition (+10): [119.93428306 107.23471398 122.95377076 140.46059713 105.31693251]
Type: <class 'microdf.generic.MicroSeries'>
Weights preserved: True

Subtraction (-5): [104.93428306  92.23471398 107.95377076 125.46059713  90.31693251]
Type: <class 'microdf.generic.MicroSeries'>

Multiplication (*2): [219.86856612 194.46942795 225.90754152 260.92119426 190.63386501]
Type: <class 'microdf.generic.MicroSeries'>

Division (/2): [54.96714153 48.61735699 56.47688538 65.23029856 47.65846625]
Type: <class 'microdf.generic.MicroSeries'>

Floor division (//2): [54. 48. 56. 65. 47.]
Type: <class 'microdf.generic.MicroSeries'>

Modulo (%10): [9.93428306 7.23471398 2.95377076 0.46059713 5.31693251]
Type: <class 'microdf.generic.MicroSeries'>

Power (**2): [12085.54659197  9454.58960211 12758.55432936 17019.96

In [7]:
# Test 7: Comparison Operations
print("=== COMPARISON OPERATIONS ===\n")

# Using the small sample
print(f"Original values: {ms_small.values}")
print()

# Less than
result = ms_small < 100
print(f"Less than (<100): {result.values}")
print(f"Type: {type(result)}")
print()

# Less than or equal
result = ms_small <= 100
print(f"Less than or equal (<=100): {result.values}")
print(f"Type: {type(result)}")
print()

# Equal
result = ms_small == ms_small.values[0]
print(f"Equal (==first_value): {result.values}")
print(f"Type: {type(result)}")
print()

# Not equal
result = ms_small != ms_small.values[0]
print(f"Not equal (!=first_value): {result.values}")
print(f"Type: {type(result)}")
print()

# Greater than or equal
result = ms_small >= 100
print(f"Greater than or equal (>=100): {result.values}")
print(f"Type: {type(result)}")
print()

# Greater than
result = ms_small > 100
print(f"Greater than (>100): {result.values}")
print(f"Type: {type(result)}")
print()

=== COMPARISON OPERATIONS ===

Original values: [109.93428306  97.23471398 112.95377076 130.46059713  95.31693251]

Less than (<100): [False  True False False  True]
Type: <class 'microdf.generic.MicroSeries'>

Less than or equal (<=100): [False  True False False  True]
Type: <class 'microdf.generic.MicroSeries'>

Equal (==first_value): [ True False False False False]
Type: <class 'microdf.generic.MicroSeries'>

Not equal (!=first_value): [False  True  True  True  True]
Type: <class 'microdf.generic.MicroSeries'>

Greater than or equal (>=100): [ True False  True  True False]
Type: <class 'microdf.generic.MicroSeries'>

Greater than (>100): [ True False  True  True False]
Type: <class 'microdf.generic.MicroSeries'>



In [8]:
# Test 8: Logical Operations
print("=== LOGICAL OPERATIONS ===\n")

# Create boolean MicroSeries for logical operations
bool_ms1 = ms_small > 100
bool_ms2 = ms_small > 80
print(f"bool_ms1 (>100): {bool_ms1.values}")
print(f"bool_ms2 (>80): {bool_ms2.values}")
print()

# Logical AND
result = bool_ms1 & bool_ms2
print(f"Logical AND (bool_ms1 & bool_ms2): {result.values}")
print(f"Type: {type(result)}")
print()

# Logical OR
result = bool_ms1 | bool_ms2
print(f"Logical OR (bool_ms1 | bool_ms2): {result.values}")
print(f"Type: {type(result)}")
print()

# Logical XOR
result = bool_ms1 ^ bool_ms2
print(f"Logical XOR (bool_ms1 ^ bool_ms2): {result.values}")
print(f"Type: {type(result)}")
print()

# Logical NOT
result = ~bool_ms1
print(f"Logical NOT (~bool_ms1): {result.values}")
print(f"Type: {type(result)}")
print()

=== LOGICAL OPERATIONS ===

bool_ms1 (>100): [ True False  True  True False]
bool_ms2 (>80): [ True  True  True  True  True]

Logical AND (bool_ms1 & bool_ms2): [ True False  True  True False]
Type: <class 'microdf.generic.MicroSeries'>

Logical OR (bool_ms1 | bool_ms2): [ True  True  True  True  True]
Type: <class 'microdf.generic.MicroSeries'>

Logical XOR (bool_ms1 ^ bool_ms2): [False  True False False  True]
Type: <class 'microdf.generic.MicroSeries'>

Logical NOT (~bool_ms1): [False  True False False  True]
Type: <class 'microdf.generic.MicroSeries'>



In [9]:
# Test 9: In-place Assignment Operations
print("=== IN-PLACE ASSIGNMENT OPERATIONS ===\n")

# Create copies for in-place operations
ms_iadd = ms_small.copy()
ms_isub = ms_small.copy()
ms_imul = ms_small.copy()
ms_idiv = ms_small.copy()
ms_ifloordiv = ms_small.copy()
ms_imod = ms_small.copy()
ms_ipow = ms_small.copy()

print(f"Original values: {ms_small.values}")
print()

# In-place addition
ms_iadd += 10
print(f"In-place addition (+=10): {ms_iadd.values}")
print(f"Type: {type(ms_iadd)}")
print()

# In-place subtraction
ms_isub -= 5
print(f"In-place subtraction (-=5): {ms_isub.values}")
print(f"Type: {type(ms_isub)}")
print()

# In-place multiplication
ms_imul *= 2
print(f"In-place multiplication (*=2): {ms_imul.values}")
print(f"Type: {type(ms_imul)}")
print()

# In-place division
ms_idiv /= 2
print(f"In-place division (/=2): {ms_idiv.values}")
print(f"Type: {type(ms_idiv)}")
print()

# In-place floor division
ms_ifloordiv //= 2
print(f"In-place floor division (//=2): {ms_ifloordiv.values}")
print(f"Type: {type(ms_ifloordiv)}")
print()

# In-place modulo
ms_imod %= 10
print(f"In-place modulo (%=10): {ms_imod.values}")
print(f"Type: {type(ms_imod)}")
print()

# In-place power
ms_ipow **= 2
print(f"In-place power (**=2): {ms_ipow.values}")
print(f"Type: {type(ms_ipow)}")
print()

=== IN-PLACE ASSIGNMENT OPERATIONS ===

Original values: [109.93428306  97.23471398 112.95377076 130.46059713  95.31693251]

In-place addition (+=10): [119.93428306 107.23471398 122.95377076 140.46059713 105.31693251]
Type: <class 'microdf.generic.MicroSeries'>

In-place subtraction (-=5): [104.93428306  92.23471398 107.95377076 125.46059713  90.31693251]
Type: <class 'microdf.generic.MicroSeries'>

In-place multiplication (*=2): [219.86856612 194.46942795 225.90754152 260.92119426 190.63386501]
Type: <class 'microdf.generic.MicroSeries'>

In-place division (/=2): [54.96714153 48.61735699 56.47688538 65.23029856 47.65846625]
Type: <class 'microdf.generic.MicroSeries'>

In-place floor division (//=2): [54. 48. 56. 65. 47.]
Type: <class 'microdf.generic.MicroSeries'>

In-place modulo (%=10): [9.93428306 7.23471398 2.95377076 0.46059713 5.31693251]
Type: <class 'microdf.generic.MicroSeries'>

In-place power (**=2): [12085.54659197  9454.58960211 12758.55432936 17019.96740304
  9085.317622

In [10]:
# Test 10: Indexing and Slicing Operations
print("=== INDEXING AND SLICING OPERATIONS ===\n")

print(f"Original MicroSeries: {ms_small.values}")
print(f"Original weights: {ms_small.weights.values}")
print()

# Single element access
result = ms_small[0]
print(f"Single element access [0]: {result}")
print(f"Type: {type(result)}")
print()

# Slice access
result = ms_small[1:4]
print(f"Slice access [1:4]: {result.values}")
print(f"Type: {type(result)}")
print(f"Sliced weights: {result.weights.values}")
print()

# Boolean indexing
mask = ms_small > 100
result = ms_small[mask]
print(f"Boolean indexing [mask]: {result.values}")
print(f"Type: {type(result)}")
print(f"Filtered weights: {result.weights.values}")
print()

# List indexing
result = ms_small[[0, 2, 4]]
print(f"List indexing [[0, 2, 4]]: {result.values}")
print(f"Type: {type(result)}")
print(f"Selected weights: {result.weights.values}")
print()

=== INDEXING AND SLICING OPERATIONS ===

Original MicroSeries: [109.93428306  97.23471398 112.95377076 130.46059713  95.31693251]
Original weights: [0.18330114 0.11044882 1.01178411 1.22579494 0.03209575]

Single element access [0]: 109.93428306022466
Type: <class 'numpy.float64'>

Slice access [1:4]: [ 97.23471398 112.95377076 130.46059713]
Type: <class 'microdf.generic.MicroSeries'>
Sliced weights: [0.11044882 1.01178411 1.22579494]

Boolean indexing [mask]: [109.93428306 112.95377076 130.46059713]
Type: <class 'microdf.generic.MicroSeries'>
Filtered weights: [0.18330114 1.01178411 1.22579494]

List indexing [[0, 2, 4]]: [109.93428306 112.95377076  95.31693251]
Type: <class 'microdf.generic.MicroSeries'>
Selected weights: [0.18330114 1.01178411 0.03209575]



In [11]:
# Test 11: Unary Operations
print("=== UNARY OPERATIONS ===\n")

print(f"Original values: {ms_small.values}")
print()

# Negation
result = -ms_small
print(f"Negation (-ms_small): {result.values}")
print(f"Type: {type(result)}")
print()

# Positive (unary +)
result = +ms_small
print(f"Positive (+ms_small): {result.values}")
print(f"Type: {type(result)}")
print()

# Representation
print("=== REPRESENTATION ===")
print("MicroSeries representation:")
print(ms_small)
print(f"Type of __repr__: {type(ms_small.__repr__())}")
print()

=== UNARY OPERATIONS ===

Original values: [109.93428306  97.23471398 112.95377076 130.46059713  95.31693251]

Negation (-ms_small): [-109.93428306  -97.23471398 -112.95377076 -130.46059713  -95.31693251]
Type: <class 'microdf.generic.MicroSeries'>

Positive (+ms_small): [109.93428306  97.23471398 112.95377076 130.46059713  95.31693251]
Type: <class 'microdf.generic.MicroSeries'>

=== REPRESENTATION ===
MicroSeries representation:
        value    weight
0  109.934283  0.183301
1   97.234714  0.110449
2  112.953771  1.011784
3  130.460597  1.225795
4   95.316933  0.032096
Type of __repr__: <class 'str'>



In [12]:
# Test 12: Summary of all operations and their return types
print("=== SUMMARY OF ALL OPERATIONS AND RETURN TYPES ===\n")

operations_summary = [
    ("Scalar Functions (return float)", [
        "sum", "count", "mean", "median", "gini", "top_x_pct_share", 
        "bottom_x_pct_share", "top_50_pct_share", "bottom_50_pct_share", 
        "top_10_pct_share", "top_1_pct_share", "top_0_1_pct_share", "t10_b50"
    ]),
    ("Vector Functions (return pd.Series)", [
        "weight", "cumsum", "rank", "decile_rank", "quintile_rank", 
        "quartile_rank", "percentile_rank"
    ]),
    ("Agnostic Functions (return type depends on input)", [
        "quantile"
    ]),
    ("Utility Functions", [
        "set_weights (returns None)", "copy (returns MicroSeries)", 
        "equals (returns bool)", "groupby (returns MicroSeriesGroupBy)"
    ]),
    ("Arithmetic Operators (return MicroSeries)", [
        "+", "-", "*", "/", "//", "%", "**"
    ]),
    ("Comparison Operators (return MicroSeries with bool values)", [
        "<", "<=", "==", "!=", ">=", ">"
    ]),
    ("Logical Operators (return MicroSeries with bool values)", [
        "&", "|", "^", "~"
    ]),
    ("In-place Assignment Operators (return MicroSeries)", [
        "+=", "-=", "*=", "/=", "//=", "%=", "**="
    ]),
    ("Indexing Operations (return varies)", [
        "[index] (single element returns scalar)", 
        "[slice] (returns MicroSeries)", 
        "[boolean_mask] (returns MicroSeries)",
        "[list] (returns MicroSeries)"
    ]),
    ("Unary Operations (return MicroSeries)", [
        "-", "+"
    ]),
    ("Special Methods", [
        "__repr__ (returns str)", "__getattr__ (returns MicroSeries)"
    ])
]

for category, operations in operations_summary:
    print(f"{category}:")
    for op in operations:
        print(f"  - {op}")
    print()

print("=" * 50)
print("All operations have been tested successfully!")
print("Key findings:")
print("1. Scalar functions return float values")
print("2. Vector functions return pd.Series objects")
print("3. All arithmetic, comparison, and logical operations return MicroSeries")
print("4. Weights are preserved across all operations")
print("5. Indexing operations maintain the MicroSeries type when returning multiple elements")
print("6. Unary operations now work correctly and return MicroSeries objects")

=== SUMMARY OF ALL OPERATIONS AND RETURN TYPES ===

Scalar Functions (return float):
  - sum
  - count
  - mean
  - median
  - gini
  - top_x_pct_share
  - bottom_x_pct_share
  - top_50_pct_share
  - bottom_50_pct_share
  - top_10_pct_share
  - top_1_pct_share
  - top_0_1_pct_share
  - t10_b50

Vector Functions (return pd.Series):
  - weight
  - cumsum
  - rank
  - decile_rank
  - quintile_rank
  - quartile_rank
  - percentile_rank

Agnostic Functions (return type depends on input):
  - quantile

Utility Functions:
  - set_weights (returns None)
  - copy (returns MicroSeries)
  - equals (returns bool)
  - groupby (returns MicroSeriesGroupBy)

Arithmetic Operators (return MicroSeries):
  - +
  - -
  - *
  - /
  - //
  - %
  - **

Comparison Operators (return MicroSeries with bool values):
  - <
  - <=
  - ==
  - !=
  - >=
  - >

Logical Operators (return MicroSeries with bool values):
  - &
  - |
  - ^
  - ~

In-place Assignment Operators (return MicroSeries):
  - +=
  - -=
  - *=
  - /

In [13]:
equiv_income = sim.calculate("equiv_hbai_household_net_income")
weights=equiv_income.weights
weights = sim.calculate("household_weight")
decile = equiv_income.decile_rank().clip(1, 10)
decile1=decile==1
print(decile1.sum() / 1e6)
total_weighted = weights.sum()
proportion = (decile1 * weights).sum() / total_weighted
print(proportion*equiv_income.count() / 1e6)

NameError: name 'sim' is not defined

In [14]:
# Test 13: Realistic Usage Example - Simulating equiv_income behavior
print("=== REALISTIC USAGE EXAMPLE - EQUIV_INCOME SIMULATION ===\n")

# Create mock data that simulates equiv_hbai_household_net_income
np.random.seed(42)
n_households = 100000  # Simulate 100k households

# Generate realistic household equivalent income distribution
equiv_income_data = np.random.lognormal(mean=10.5, sigma=0.6, size=n_households)
household_weights = np.random.exponential(scale=1.2, size=n_households)

# Create MicroSeries simulating equiv_income calculation result
equiv_income = MicroSeries(equiv_income_data, weights=household_weights)

print(f"Created equiv_income simulation:")
print(f"  - {len(equiv_income):,} households")
print(f"  - Mean income: £{equiv_income.mean():.2f}")
print(f"  - Total weighted count: {equiv_income.count():,.0f}")
print()

# Simulate the exact operations from your example
print("=== REPLICATING YOUR EXAMPLE CODE ===\n")

# Get weights (simulating sim.calculate("household_weight"))
weights = equiv_income.weights
print(f"weights type: {type(weights)}")
print(f"weights shape: {weights.shape}")
print()

# Calculate decile ranks and clip to 1-10
decile = equiv_income.decile_rank().clip(1, 10)
print(f"decile type: {type(decile)}")
print(f"decile unique values: {sorted(decile.unique())}")
print(f"decile shape: {decile.shape}")
print()

# Create boolean mask for decile 1
decile1 = decile == 1
print(f"decile1 type: {type(decile1)}")
print(f"decile1 values: {decile1.value_counts()}")
print()

# First calculation: decile1.sum() / 1e6
result1 = decile1.sum() / 1e6
print(f"decile1.sum() / 1e6 = {result1:.4f}")
print(f"  - decile1.sum() = {decile1.sum():.0f}")
print(f"  - decile1.sum() type: {type(decile1.sum())}")
print()

# Second calculation: total_weighted and proportion
total_weighted = weights.sum()
print(f"total_weighted = weights.sum() = {total_weighted:.0f}")
print(f"total_weighted type: {type(total_weighted)}")
print()

proportion = (decile1 * weights).sum() / total_weighted
print(f"proportion = (decile1 * weights).sum() / total_weighted = {proportion:.4f}")
print(f"  - (decile1 * weights) type: {type(decile1 * weights)}")
print(f"  - (decile1 * weights).sum() = {(decile1 * weights).sum():.0f}")
print(f"  - (decile1 * weights).sum() type: {type((decile1 * weights).sum())}")
print()

result2 = proportion * equiv_income.count() / 1e6
print(f"proportion * equiv_income.count() / 1e6 = {result2:.4f}")
print(f"  - equiv_income.count() = {equiv_income.count():.0f}")
print(f"  - equiv_income.count() type: {type(equiv_income.count())}")
print()

print("=== BEHAVIOR ANALYSIS ===")
print("Key observations:")
print("1. decile_rank() returns a MicroSeries with integer values 1-10")
print("2. Boolean comparison (decile == 1) returns MicroSeries with True/False")
print("3. .sum() on boolean MicroSeries counts True values (weighted)")
print("4. Arithmetic operations between MicroSeries preserve weights")
print("5. All scalar operations (.sum(), .count()) return float values")
print("6. Vector operations maintain MicroSeries type with preserved weights")
print()

=== REALISTIC USAGE EXAMPLE - EQUIV_INCOME SIMULATION ===

Created equiv_income simulation:
  - 100,000 households
  - Mean income: £43394.38
  - Total weighted count: 120,852

=== REPLICATING YOUR EXAMPLE CODE ===

weights type: <class 'pandas.core.series.Series'>
weights shape: (100000,)

decile type: <class 'microdf.generic.MicroSeries'>
decile unique values: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]
decile shape: (100000,)

decile1 type: <class 'microdf.generic.MicroSeries'>
decile1 values: False    90058
True      9942
Name: count, dtype: int64

decile1.sum() / 1e6 = 0.0121
  - decile1.sum() = 12083
  - decile1.sum() type: <class 'numpy.float64'>

total_weighted = weights.sum() = 120852
total_weighted type: <class 'numpy.float64'>

proportion = (decile1 * weights).sum() / total_weighted = 0.2466
  - (decile1 * weights) type: <class 'microdf.generic.MicroSeries'>
  - (decile1 * weights).sum() = 29801
  - (decile1 * weights).sum() type: <class 'numpy.float64'>

proportion 

In [15]:
# Test 15: Zero Weights Error Handling
print("=== ZERO WEIGHTS ERROR HANDLING ===\n")

# Test case 1: All weights are zero
try:
    zero_weights_data = [1, 2, 3, 4, 5]
    zero_weights = [0, 0, 0, 0, 0]
    zero_ms = MicroSeries(zero_weights_data, weights=zero_weights)
    
    print("Testing rank() with all zero weights...")
    result = zero_ms.rank()
    print("ERROR: Should have raised ZeroDivisionError!")
except ZeroDivisionError as e:
    print(f"✓ Correctly raised ZeroDivisionError: {e}")
except Exception as e:
    print(f"✗ Unexpected error: {e}")

print()

# Test case 2: All weights are zero with pct=True
try:
    print("Testing rank(pct=True) with all zero weights...")
    result = zero_ms.rank(pct=True)
    print("ERROR: Should have raised ZeroDivisionError!")
except ZeroDivisionError as e:
    print(f"✓ Correctly raised ZeroDivisionError: {e}")
except Exception as e:
    print(f"✗ Unexpected error: {e}")

print()

# Test case 3: Methods that depend on rank() should also fail
print("Testing methods that depend on rank()...")

try:
    print("Testing decile_rank() with all zero weights...")
    result = zero_ms.decile_rank()
    print("ERROR: Should have raised ZeroDivisionError!")
except ZeroDivisionError as e:
    print(f"✓ decile_rank() correctly raised ZeroDivisionError: {e}")
except Exception as e:
    print(f"✗ Unexpected error in decile_rank(): {e}")

try:
    print("Testing quintile_rank() with all zero weights...")
    result = zero_ms.quintile_rank()
    print("ERROR: Should have raised ZeroDivisionError!")
except ZeroDivisionError as e:
    print(f"✓ quintile_rank() correctly raised ZeroDivisionError: {e}")
except Exception as e:
    print(f"✗ Unexpected error in quintile_rank(): {e}")

print()

# Test case 4: Methods that should still work with zero weights
print("Testing methods that should work with zero weights...")

try:
    result = zero_ms.count()
    print(f"✓ count() works with zero weights: {result}")
except Exception as e:
    print(f"✗ count() failed unexpectedly: {e}")

try:
    result = zero_ms.sum()
    print(f"✓ sum() works with zero weights: {result}")
except Exception as e:
    print(f"✗ sum() failed unexpectedly: {e}")

try:
    result = zero_ms.cumsum()
    print(f"✓ cumsum() works with zero weights: first 3 values = {result.head(3).values}")
except Exception as e:
    print(f"✗ cumsum() failed unexpectedly: {e}")

print()

# Test case 5: Normal weights should still work
print("Testing with normal weights (should work)...")
normal_ms = MicroSeries([1, 2, 3, 4, 5], weights=[1, 1, 1, 1, 1])
result = normal_ms.rank()
print(f"✓ Normal weights work: {result.values}")
result_pct = normal_ms.rank(pct=True)
print(f"✓ Normal weights with pct=True work: {result_pct.values}")

print()
print("=== ERROR HANDLING SUMMARY ===")
print("✓ rank() method now properly handles zero weights with ZeroDivisionError")
print("✓ Methods that depend on rank() (decile_rank, quintile_rank, etc.) inherit this protection")
print("✓ Methods that don't involve division (count, sum, cumsum) work fine with zero weights")
print("✓ Normal operation with non-zero weights remains unchanged")
print("✓ Clear error message explains the issue and cause")

cumsum() returns cumulative sums of weighted values as a regular pandas Series. The original weights have already been applied and cannot be reused with the cumulative results.


=== ZERO WEIGHTS ERROR HANDLING ===

Testing rank() with all zero weights...
✓ Correctly raised ZeroDivisionError: Cannot calculate rank with zero total weight. All weights in the MicroSeries are zero, which would result in division by zero.

Testing rank(pct=True) with all zero weights...
✓ Correctly raised ZeroDivisionError: Cannot calculate rank with zero total weight. All weights in the MicroSeries are zero, which would result in division by zero.

Testing methods that depend on rank()...
Testing decile_rank() with all zero weights...
✓ decile_rank() correctly raised ZeroDivisionError: Cannot calculate rank with zero total weight. All weights in the MicroSeries are zero, which would result in division by zero.
Testing quintile_rank() with all zero weights...
✓ quintile_rank() correctly raised ZeroDivisionError: Cannot calculate rank with zero total weight. All weights in the MicroSeries are zero, which would result in division by zero.

Testing methods that should work with zero we

In [16]:
# Test 16: Division by Zero Error Test - Dedicated Test for Zero Weights
print("=== DIVISION BY ZERO ERROR TEST ===\n")

# Create test scenarios for division by zero
test_scenarios = [
    {"name": "All zeros", "weights": [0, 0, 0, 0, 0]},
    {"name": "Mix with zeros summing to zero", "weights": [0.0, 0.0, 0.0, 0.0, 0.0]},
    {"name": "Negative and positive canceling out", "weights": [1, -1, 0, 0, 0]},
    {"name": "Very small floats summing to zero", "weights": [1e-16, -1e-16, 0, 0, 0]},
]

data = [10, 20, 30, 40, 50]

for scenario in test_scenarios:
    print(f"Testing scenario: {scenario['name']}")
    print(f"Weights: {scenario['weights']}")
    
    try:
        ms = MicroSeries(data, weights=scenario['weights'])
        print(f"  - Weight sum: {ms.weights.sum()}")
        
        # Test rank()
        result = ms.rank()
        print(f"  - ✗ rank() should have failed but returned: {result.values}")
        
    except ZeroDivisionError as e:
        print(f"  - ✓ rank() correctly raised ZeroDivisionError: {str(e)[:80]}...")
    except Exception as e:
        print(f"  - ✗ Unexpected error: {type(e).__name__}: {e}")
    
    print()

# Test edge case: weights very close to zero but not exactly zero
print("Testing edge case: weights very close to zero but not exactly zero")
tiny_weights = [1e-10, 1e-10, 1e-10, 1e-10, 1e-10]
try:
    ms_tiny = MicroSeries(data, weights=tiny_weights)
    result = ms_tiny.rank()
    print(f"✓ Very small weights work: sum = {ms_tiny.weights.sum():.2e}")
    print(f"  - rank() values: {result.values}")
except Exception as e:
    print(f"✗ Unexpected error with tiny weights: {e}")

print()

# Test that the error propagates through dependent methods
print("Testing error propagation through dependent methods:")
zero_ms = MicroSeries([1, 2, 3, 4, 5], weights=[0, 0, 0, 0, 0])

dependent_methods = [
    ('decile_rank', lambda x: x.decile_rank()),
    ('quintile_rank', lambda x: x.quintile_rank()),
    ('quartile_rank', lambda x: x.quartile_rank()),
    ('percentile_rank', lambda x: x.percentile_rank()),
    ('rank with pct=True', lambda x: x.rank(pct=True)),
]

for method_name, method_func in dependent_methods:
    try:
        result = method_func(zero_ms)
        print(f"✗ {method_name} should have failed")
    except ZeroDivisionError as e:
        print(f"✓ {method_name} correctly raised ZeroDivisionError")
    except Exception as e:
        print(f"✗ {method_name} raised unexpected error: {type(e).__name__}")

print()

# Test methods that should work with zero weights
print("Testing methods that should work with zero weights:")
safe_methods = [
    ('count', lambda x: x.count()),
    ('sum', lambda x: x.sum()),
    ('cumsum', lambda x: x.cumsum().head(3)),
]

for method_name, method_func in safe_methods:
    try:
        result = method_func(zero_ms)
        print(f"✓ {method_name} works with zero weights: {result}")
    except Exception as e:
        print(f"✗ {method_name} failed unexpectedly: {type(e).__name__}: {e}")

print()
print("=== DIVISION BY ZERO TEST SUMMARY ===")
print("✓ All zero weight scenarios properly raise ZeroDivisionError")
print("✓ Error message clearly explains the division by zero issue")
print("✓ All methods dependent on rank() inherit the protection")
print("✓ Very small but non-zero weights still work correctly")
print("✓ Methods that don't involve division work fine with zero weights")
print("✓ Error handling is consistent across all ranking methods")

cumsum() returns cumulative sums of weighted values as a regular pandas Series. The original weights have already been applied and cannot be reused with the cumulative results.


=== DIVISION BY ZERO ERROR TEST ===

Testing scenario: All zeros
Weights: [0, 0, 0, 0, 0]
  - Weight sum: 0.0
  - ✓ rank() correctly raised ZeroDivisionError: Cannot calculate rank with zero total weight. All weights in the MicroSeries are...

Testing scenario: Mix with zeros summing to zero
Weights: [0.0, 0.0, 0.0, 0.0, 0.0]
  - Weight sum: 0.0
  - ✓ rank() correctly raised ZeroDivisionError: Cannot calculate rank with zero total weight. All weights in the MicroSeries are...

Testing scenario: Negative and positive canceling out
Weights: [1, -1, 0, 0, 0]
  - Weight sum: 0.0
  - ✓ rank() correctly raised ZeroDivisionError: Cannot calculate rank with zero total weight. All weights in the MicroSeries are...

Testing scenario: Very small floats summing to zero
Weights: [1e-16, -1e-16, 0, 0, 0]
  - Weight sum: 0.0
  - ✓ rank() correctly raised ZeroDivisionError: Cannot calculate rank with zero total weight. All weights in the MicroSeries are...

Testing edge case: weights very close to zer