In [None]:
import copy
from typing import Dict, List, Union, Any

# Type alias for the recursive attribute structure
Attributes = Dict[str, Union[int, 'Attributes']]

class QuantifiedComposition:
    def __init__(self, qty: int, unit: str, attributes: Attributes):
        self.qty = qty
        self.unit = unit
        self.attributes = attributes

    def __repr__(self):
        return f"Qty: {self.qty} {self.unit}\nAttrs: {self.attributes}"

def _distribute_single_attribute(total_attr_value: int, ratio_qtys: List[int]) -> List[int]:
    """
    Distributes a single integer value using the Largest Remainder Method for fairness
    while maintaining determinism.
    """
    if sum(ratio_qtys) == 0:
        return [0] * len(ratio_qtys)

    total_ratio_qty = sum(ratio_qtys)
    
    # Calculate ideal shares with remainders
    shares = []
    for i, ratio_qty in enumerate(ratio_qtys):
        # Use a larger integer base to avoid floating point math
        ideal_share_scaled = (total_attr_value * ratio_qty * 10000) // total_ratio_qty
        floor_part = ideal_share_scaled // 10000
        remainder_part = ideal_share_scaled % 10000
        shares.append({
            'index': i,
            'floor': floor_part,
            'remainder': remainder_part
        })

    # Allocate the floor amounts
    distributed_values = [s['floor'] for s in shares]
    
    # Calculate how many units are left to distribute
    allocated_sum = sum(distributed_values)
    remainder_units = total_attr_value - allocated_sum
    
    # Sort by remainder to find who gets the leftover units.
    # Add index as a tie-breaker for determinism.
    shares.sort(key=lambda x: (x['remainder'], x['index']), reverse=True)
    
    # Distribute the remainder units one by one
    for i in range(remainder_units):
        winner_index = shares[i]['index']
        distributed_values[winner_index] += 1
        
    return distributed_values

def distribute_composition(from_comp: QuantifiedComposition, flow_ratio_qtys: List[int]) -> List[QuantifiedComposition]:
    """
    Implements the "Physical First, Abstract Follows" model using the Largest Remainder Method.
    
    Phase 1: Distribute physical attributes (varietals) to determine true flow quantities.
    Phase 2: Distribute abstract attributes (cost) based on the true quantities.
    """
    # --- Phase 1: Distribute Physical Attributes (varietals) ---
    flow_varietals = [{} for _ in flow_ratio_qtys]
    source_varietals = from_comp.attributes.get('varietal', {})
    
    for varietal_name, total_varietal_qty in source_varietals.items():
        distributed_varietal = _distribute_single_attribute(total_varietal_qty, flow_ratio_qtys)
        for i, amount in enumerate(distributed_varietal):
            flow_varietals[i][varietal_name] = amount
            
    # Calculate the true, final quantity of each flow by summing its varietals
    final_flow_qtys = [sum(varietal_dict.values()) for varietal_dict in flow_varietals]

    # This will hold the final compositions for each flow
    flow_compositions = [
        QuantifiedComposition(qty=final_qty, unit=from_comp.unit, attributes={'varietal': varietals})
        for final_qty, varietals in zip(final_flow_qtys, flow_varietals)
    ]

    # --- Phase 2: Distribute Abstract Attributes (cost, etc.) ---
    abstract_ratio_qtys = final_flow_qtys
    
    for attr_type, attr_dict in from_comp.attributes.items():
        if attr_type == 'varietal':
            continue
        for comp in flow_compositions:
            if attr_type not in comp.attributes:
                comp.attributes[attr_type] = {}
        for attr_name, total_attr_qty in attr_dict.items():
            distributed_attr = _distribute_single_attribute(total_attr_qty, abstract_ratio_qtys)
            for i, amount in enumerate(distributed_attr):
                flow_compositions[i].attributes[attr_type][attr_name] = amount

    return flow_compositions


def blend_compositions(compositions: List[QuantifiedComposition]) -> QuantifiedComposition:
    """
    Blends multiple compositions into one.
    """
    if not compositions:
        raise ValueError("Cannot blend empty compositions")
        
    total_qty = 0
    merged_attributes = {}
    
    for comp in compositions:
        total_qty += comp.qty
        
        def merge_attrs(target, source):
            for key, value in source.items():
                if isinstance(value, int):
                    target[key] = target.get(key, 0) + value
                elif isinstance(value, dict):
                    if key not in target:
                        target[key] = {}
                    merge_attrs(target[key], value)
                    
        merge_attrs(merged_attributes, comp.attributes)
        
    return QuantifiedComposition(total_qty, compositions[0].unit, merged_attributes)

# Composition Distribution Model

This notebook defines and tests the definitive algorithm for distributing wine composition attributes during cellar operations.

The core logic is based on the **"Physical First, Abstract Follows"** model, implemented with the **Largest Remainder Method** to ensure fair and deterministic distribution of integer quantities.

### Key Functions:
- `distribute_composition()`: The main function that takes a source composition and a list of flow ratios and returns a list of resulting compositions.
- `blend_compositions()`: A helper to merge compositions for verification.
- `_distribute_single_attribute()`: The core distribution logic using the Largest Remainder Method.

In [None]:
## Scenario 1: Standard Split

Split 10,000 h-units (1.0000 gallon) into 3 unequal parts. This tests the basic distribution and conservation of varietals and costs.

Source: Qty: 10000 gal
Attrs: {'varietal': {'CAB': 6000, 'MER': 4000}, 'cost': {'real_usd': 10000, 'nominal_usd': 10000}}
--------------------
Flow 1 (3332): {'varietal': {'CAB': 1999, 'MER': 1333}, 'cost': {'real_usd': 3332, 'nominal_usd': 3332}}
Flow 2 (3333): {'varietal': {'CAB': 2000, 'MER': 1333}, 'cost': {'real_usd': 3333, 'nominal_usd': 3333}}
Flow 3 (3335): {'varietal': {'CAB': 2001, 'MER': 1334}, 'cost': {'real_usd': 3335, 'nominal_usd': 3335}}
--------------------
Re-blended: Qty: 10000 gal
Attrs: {'varietal': {'CAB': 6000, 'MER': 4000}, 'cost': {'real_usd': 10000, 'nominal_usd': 10000}}
Matches Source? True


source = QuantifiedComposition(
    qty=10000,
    unit="h-unit",
    attributes={
        "varietal": {
            "CAB": 6000,
            "MER": 4000
        },
        "cost": {
            "real_usd": 10000,
            "nominal_usd": 10000
        }
    }
)

# Split into 3 flows with ratios: 1/3, 1/3, 1/3
flows = [3333, 3333, 3334]
results = distribute_composition(source, flows)

print("Source:", source, "\n" + "-"*20)
for i, res in enumerate(results):
    print(f"Flow {i+1} ({res.qty}): {res.attributes}")

# Verify conservation
blended_back = blend_compositions(results)
print("-" * 20, "\nRe-blended:", blended_back)
print("Matches Source?", blended_back.attributes == source.attributes)

In [None]:
## Scenario 2: Loss Event

A tank loses 1% of its volume. This tests how physical and abstract attributes are handled in a loss scenario.

**Note:** The special business rule for `nominal_usd` (that it should not flow to loss) is not yet implemented here. This test just verifies the proportional distribution math.

Tank C (Blend): Qty: 20000 gal
Attrs: {'varietal': {'CAB': 10000, 'MER': 10000}}


source_for_loss = QuantifiedComposition(
    qty=10000,
    unit="h-unit",
    attributes={
        "varietal": {"CAB": 10000},
        "cost": {
            "real_usd": 10000,
            "nominal_usd": 10000
        }
    }
)

# 1% loss (100 units), 99% remain (9900 units)
loss_flows = [100, 9900]
results_loss = distribute_composition(source_for_loss, loss_flows)

loss_flow = results_loss[0]
remain_flow = results_loss[1]

print("Loss Flow:", loss_flow)
print("Remaining:", remain_flow)

print("\n--- Analysis ---")
print(f"Real USD Lost: {loss_flow.attributes['cost']['real_usd']}")
print(f"Nominal USD Lost: {loss_flow.attributes['cost']['nominal_usd']}")
if loss_flow.attributes['cost']['nominal_usd'] > 0:
    print("⚠️ ISSUE: Nominal dollars flowed to loss! (This is expected for now)")
else:
    print("✅ Nominal dollars conserved.")

In [None]:
## Scenario 3: Edge Case - Tiny Quantity Split

A tank with only 2 h-units (1 Chardonnay, 1 Pinot) is split into two equal flows. This tests the "fairness" of the distribution at the lowest possible integer values, ensuring attributes are not "chunked" into one flow.

Loss Flow: Qty: 100 gal
Attrs: {'cost': {'real_usd': 100, 'nominal_usd': 100}}
Remaining: Qty: 9900 gal
Attrs: {'cost': {'real_usd': 9900, 'nominal_usd': 9900}}

--- Analysis ---
Real USD Lost: 100
Nominal USD Lost: 100
⚠️ ISSUE: Nominal dollars flowed to loss! This violates the conservation rule.


tiny_tank = QuantifiedComposition(
    qty=2,
    unit="h-unit",
    attributes={
        "varietal": {
            "CHARD": 1,
            "PINOT": 1
        }
    }
)

# Split into two equal flows of 1 unit each
flows = [1, 1]
results_tiny = distribute_composition(tiny_tank, flows)

print("Source:", tiny_tank, "\n" + "-"*20)
for i, res in enumerate(results_tiny):
    print(f"Flow {i+1} ({res.qty}): {res.attributes}")

# Verify conservation
blended_back_tiny = blend_compositions(results_tiny)
print("-" * 20, "\nRe-blended:", blended_back_tiny)
print("Matches Source?", blended_back_tiny.attributes == tiny_tank.attributes)