In [None]:
import os
import openai
import anthropic
import together
import subprocess
import re

# open ai API key
openai_api_key=''

# anthropic API key
claude_api_key = ''

# together AI API key
together_api_key = ''

In [2]:
# read the prompt from the prompt text file titled "dualsourcing.txt"
with open("../prompts/dualsourcing.txt", "r") as f:
    prompt = f.read()

prompt = str(prompt)

In [3]:
claude_client = anthropic.Anthropic(api_key = claude_api_key)
openai_client = openai.OpenAI(api_key = openai_api_key)
togetherai_client = together.Together(api_key=together_api_key)

## GPT 4o

In [8]:
completion = openai_client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "You are an expert in probability theory and stochastic modeling."},
        {"role": "user", "content": prompt},
    ]
)

# get the response
response = completion.choices[0].message.content

In [9]:
print(response)

To tackle this problem, we can approach it using a simulation-based method, as deriving an analytical solution with closed-form expressions in such a complex stochastic setting is non-trivial. The primary goal is to determine near-optimal values for `order_level_exp` and `order_level_reg` to minimize costs by simulating the inventory and ordering dynamics over the planning horizon of 1000 days. Here's how we can proceed:

### Simulation-Based Approach

#### Key Steps:
1. **Initialization:** Set initial values for on-hand inventory and define the initial configuration.
2. **Demand Simulation:** For each day, simulate random demand using a normal distribution with mean `mu` and standard deviation `st_dev`.
3. **Inventory Updates:** At each day, update the on-hand inventory based on demand and incoming inventory from past orders.
4. **Order Placement:**
   - Calculate the expedited inventory position (on-hand + due in lead_exp days).
   - Calculate the regular inventory position (on-hand 

In [10]:
import numpy as np

def simulate_inventory(n_days, mu, st_dev, initial_inv, cost_reg, cost_exp, lead_reg, lead_exp, holding_cost, penalty_cost, order_level_exp, order_level_reg):
    on_hand_inventory = initial_inv
    exp_inventory_arrivals = np.zeros(n_days + lead_exp)
    reg_inventory_arrivals = np.zeros(n_days + lead_reg)
    
    total_holding_cost = 0
    total_penalty_cost = 0
    total_ordering_cost = 0
    
    for day in range(n_days):
        # Arrivals
        on_hand_inventory += exp_inventory_arrivals[day] + reg_inventory_arrivals[day]
        
        # Demand
        demand = np.random.normal(mu, st_dev)
        
        # Fulfill demand
        unsatisfied_demand = max(0, demand - on_hand_inventory)
        on_hand_inventory = max(0, on_hand_inventory - demand)
        
        # Costs calculation
        holding_cost_today = on_hand_inventory * holding_cost
        penalty_cost_today = unsatisfied_demand * penalty_cost
        total_holding_cost += holding_cost_today
        total_penalty_cost += penalty_cost_today
        
        # Calculate inventory positions
        expedited_position = on_hand_inventory + np.sum(exp_inventory_arrivals[day + 1:day + 1 + lead_exp])
        regular_position = expedited_position + np.sum(reg_inventory_arrivals[day + 1:day + 1 + lead_reg])
        
        # Expedited order
        if expedited_position < order_level_exp:
            expedited_order = order_level_exp - expedited_position
            exp_inventory_arrivals[day + lead_exp] += expedited_order
            total_ordering_cost += expedited_order * cost_exp
        
        # Regular order
        if regular_position < order_level_reg:
            regular_order = order_level_reg - regular_position
            reg_inventory_arrivals[day + lead_reg] += regular_order
            total_ordering_cost += regular_order * cost_reg
            
    # Return total mean daily cost
    mean_daily_cost = (total_holding_cost + total_penalty_cost + total_ordering_cost) / n_days
    return mean_daily_cost

# Search for the optimal order levels
order_level_exp_values = range(30, 70, 5)
order_level_reg_values = range(60, 100, 5)

best_cost = float('inf')
best_order_levels = (0, 0)

for order_level_exp in order_level_exp_values:
    for order_level_reg in order_level_reg_values:
        total_cost = simulate_inventory(
            n_days=1000,
            mu=30,
            st_dev=10,
            initial_inv=40,
            cost_reg=100,
            cost_exp=110,
            lead_reg=110,
            lead_exp=0,
            holding_cost=5,
            penalty_cost=495,
            order_level_exp=order_level_exp,
            order_level_reg=order_level_reg
        )
        
        if total_cost < best_cost:
            best_cost = total_cost
            best_order_levels = (order_level_exp, order_level_reg)

# Print the results
print(f"Optimal order levels: Expedited = {best_order_levels[0]}, Regular = {best_order_levels[1]}")
print(f"Estimated mean daily cost: {best_cost}")

Optimal order levels: Expedited = 30, Regular = 90
Estimated mean daily cost: 17501.88485703223


## o1

In [17]:
completion = openai_client.chat.completions.create(
    model="o1",
    messages=[
        {"role": "system", "content": "You are an expert in probability theory and stochastic modeling."},
        {"role": "user", "content": prompt},
    ]
)

# get the response
response = completion.choices[0].message.content

In [18]:
print(response)

Solution Explanation

Below is one practical way to tackle this dual‐sourcing, two‐lead‐time, base‐stock problem.  We outline both an approximate “closed‐form” reasoning and then give a simulation‐based search procedure that can be used to refine or validate the solution.

──────────────────────────────────────────────────────────────────
1. Approximate / Closed‐Form Reasoning
──────────────────────────────────────────────────────────────────
In a single‐supplier base‐stock (or “order‐up‐to”) system with one lead time L, mean demand μ per day, and standard deviation σ per day, the classic newsvendor approximation for choosing a base‐stock level S is:

  S* ≈ L·μ + z·√L·σ,

where z is determined by balancing the marginal holding cost h against the marginal shortage (penalty) cost p:

  Service target ≈ p / (p + h) 
  ⇒  z is the corresponding critical‐ratio z‐value from the normal tables.

In our problem:
 • Daily demand ~ Normal(μ=30, σ=10).
 • Penalty p = 495 per unit short.
 • Holdin

In [13]:
import numpy as np

def sim_inventory(order_level_exp, order_level_reg,
                  n_days=1000, 
                  initial_inv=40,
                  cost_reg=100.0, cost_exp=110.0,
                  lead_reg=110, lead_exp=0,
                  holding_cost=5.0,
                  penalty_cost=495.0,
                  mu=30.0, st_dev=10.0,
                  random_seed=None):
    """
    Simulates the inventory system with two suppliers (expedited and regular).
    Returns the average daily cost for n_days of operation.
    """
    if random_seed is not None:
        np.random.seed(random_seed)

    # Tracking variables
    on_hand = initial_inv
    backlog = 0

    # Each day, we need to keep track of what arrives from past orders.
    # For a large lead time (like lead_reg=110), we can keep queues or arrays
    # that store how many units will arrive on each future day.
    # Index day d in these arrays means arrival at the START of day d.
    max_lead = int(max(lead_reg, lead_exp))
    arrivals_reg = np.zeros(n_days + max_lead + 1, dtype=float)
    arrivals_exp = np.zeros(n_days + max_lead + 1, dtype=float)

    total_cost = 0.0

    for day in range(n_days):
        # 1) Receive arrivals (from orders placed 'lead_exp' or 'lead_reg' days ago)
        # Expedited arrivals
        arrive_today_exp = arrivals_exp[day]
        on_hand += arrive_today_exp  # for lead_exp=0, this might be from the previous day's expedite
        # Regular arrivals
        arrive_today_reg = arrivals_reg[day]
        on_hand += arrive_today_reg

        # 2) Draw today's demand from Normal(mu, st_dev); clip at 0 to avoid negative demands
        demand = np.random.normal(mu, st_dev)
        demand = max(demand, 0.0)

        # 3) Satisfy demand using on-hand inventory first
        if backlog > 0:
            # If there was backlog from previous days, attempt to satisfy it first
            if on_hand >= backlog:
                on_hand -= backlog
                backlog = 0
            else:
                backlog -= on_hand
                on_hand = 0
        # Now satisfy today's demand
        if on_hand >= demand:
            on_hand -= demand
        else:
            backlog_today = demand - on_hand
            on_hand = 0
            backlog += backlog_today

        # 4) Compute daily holding cost (for leftover on_hand) and penalty cost (for any backlog)
        daily_holding_cost = holding_cost * on_hand
        daily_penalty_cost = penalty_cost * backlog
        total_cost += (daily_holding_cost + daily_penalty_cost)

        # 5) Place expedite order if expedited inventory position < order_level_exp
        # Expedited inventory position = on_hand - backlog + any expedite arrivals in pipeline
        # Because lead_exp=0 in the default, they'll arrive tomorrow (day+1).
        # We'll treat on_hand - backlog as net "available," ignoring the pipeline for expedite (since lead_exp=0).
        expedite_position = (on_hand - backlog)
        if expedite_position < order_level_exp:
            expedite_qty = order_level_exp - expedite_position
            expedite_qty = max(expedite_qty, 0)
            # This expedite order arrives in lead_exp days
            arrive_day = day + lead_exp
            if arrive_day < (n_days + max_lead):
                arrivals_exp[arrive_day] += expedite_qty
            # Add ordering cost for expedite
            total_cost += expedite_qty * cost_exp

        # 6) Place regular order if regular inventory position < order_level_reg
        # Regular inventory position = on_hand - backlog + all outstanding arrivals (incl. expedited & regular)
        # But typically we consider the pipeline for regular orders out to lead_reg.
        # For simplicity, we can approximate by the day+lead_reg arrivals in the queue, etc.
        # A simpler approach is to treat "inventory position" as on_hand - backlog plus any future arrivals already scheduled.
        regular_position = (on_hand - backlog)
        # Count all future arrivals (day+1 through day+lead_reg) from both expedited and regular
        for fday in range(day+1, min(day+int(lead_reg)+1, n_days+max_lead)):
            regular_position += arrivals_reg[fday]
            regular_position += arrivals_exp[fday]

        if regular_position < order_level_reg:
            regular_qty = order_level_reg - regular_position
            regular_qty = max(regular_qty, 0)
            arrive_day = day + lead_reg
            if arrive_day < (n_days + max_lead):
                arrivals_reg[arrive_day] += regular_qty
            # Add ordering cost for regular
            total_cost += regular_qty * cost_reg

    # Return average daily cost
    return total_cost / float(n_days)


def grid_search(n_days=1000, 
                mu=30.0, st_dev=10.0,
                cost_reg=100.0, cost_exp=110.0,
                lead_reg=110, lead_exp=0,
                holding_cost=5.0, penalty_cost=495.0,
                initial_inv=40,
                replicate=30,  # how many replications per policy
                exp_range=range(0, 101, 10),
                reg_range=range(0, 151, 10),
                random_seeds=None):
    """
    Performs a grid search over exp_range x reg_range for the order levels.
    Returns the best (order_level_exp, order_level_reg, avg_cost).
    replicate: how many independent simulation runs to average over
    random_seeds: optional list/array of seeds (length >= replicate).
    """
    best_combo = (None, None)
    best_cost = float('inf')

    for order_level_exp in exp_range:
        for order_level_reg in reg_range:
            # Run 'replicate' simulations and average the cost
            costs = []
            for i in range(replicate):
                seed = random_seeds[i] if (random_seeds is not None and i < len(random_seeds)) else None
                c = sim_inventory(order_level_exp,
                                  order_level_reg,
                                  n_days=n_days,
                                  initial_inv=initial_inv,
                                  cost_reg=cost_reg,
                                  cost_exp=cost_exp,
                                  lead_reg=lead_reg,
                                  lead_exp=lead_exp,
                                  holding_cost=holding_cost,
                                  penalty_cost=penalty_cost,
                                  mu=mu, st_dev=st_dev,
                                  random_seed=seed)
                costs.append(c)
            avg_cost = np.mean(costs)
            if avg_cost < best_cost:
                best_cost = avg_cost
                best_combo = (order_level_exp, order_level_reg)
    return best_combo, best_cost


if __name__ == "__main__":
    # Example usage:
    # We can define a small grid search to find a near-optimal solution
    # and do 30 replications per point in the grid.
    # This is just an example; you can refine the search bounds/steps.
    
    # Optionally define random seeds for reproducibility:
    seeds = np.random.randint(1, 1_000_000, size=30)

    best_levels, best_daily_cost = grid_search(
        n_days=1000,
        mu=30.0, st_dev=10.0,
        cost_reg=100.0, cost_exp=110.0,
        lead_reg=110, lead_exp=0,
        holding_cost=5.0, penalty_cost=495.0,
        initial_inv=40,
        replicate=30,
        exp_range=range(0, 101, 10),
        reg_range=range(0, 151, 10),
        random_seeds=seeds
    )

    print("Best Expedited Level, Regular Level =", best_levels)
    print("Estimated Average Daily Cost =", best_daily_cost)

Best Expedited Level, Regular Level = (0, 150)
Estimated Average Daily Cost = 1807465.530825838


## o3-mini

In [19]:
completion = openai_client.chat.completions.create(
    model="o3-mini",
    messages=[
        {"role": "system", "content": "You are an expert in probability theory and stochastic modeling."},
        {"role": "user", "content": prompt},
    ]
)

# get the response
response = completion.choices[0].message.content

In [20]:
print(response)

Below is one way to “attack” the problem. Because our model couples a same‐day “expedited” order (with zero lead time) and a long‐lead “regular” order, it is very hard to write an exact closed‐form solution. (One might “guess” safety‐stock formulas using approximations for lead–time demand, but then the fact that expedited orders are also placed and “backfill” the expedited position makes the analysis quite messy.) Thus, our approach is two–fold:

──────────────────────────────
1. An Approximate Analytical Rationale
──────────────────────────────
The idea is to “mimic” a dual–sourcing inventory rule:
 • For the expedited (fast) channel we set an “order–up–to” level for the inventory that is immediately available. In a simple news–vendor setting with no delay one might set safety stock equal to a quantile of the demand distribution. Intuitively, one might choose
  order_level_exp ≈ μ·(lead_exp) + z·st_dev,
but since lead_exp = 0 our target comes “essentially” from covering the instantan

In [28]:
import numpy as np
import random
from statistics import mean
import math

# --------------------------
# Model parameters (defaults)
# --------------------------
n_days = 1000
initial_inv = 40

# Costs
cost_reg = 100.00      # cost per unit for regular orders
cost_exp = 110.00      # cost per unit for expedited orders
holding_cost = 5.00    # per unit per day holding cost
penalty_cost = 495.00  # per unit backorder penalty cost

# Lead times
lead_reg = 110         # days for regular orders
lead_exp = 0           # days for expedited orders (arrives immediately)

# Demand parameters
mu = 30.0
st_dev = 10.0

# Simulation settings for grid search
# Budget of simulation evaluations (total replications) e.g., 1000 replications spread across grid evaluations.
replications_per_candidate = 50  # number of replications to average per candidate pair

# Define candidate grid ranges (you can adjust the ranges for a finer search)
order_level_exp_candidates = range(30, 71, 10)   # e.g., 30, 40, 50, 60, 70
order_level_reg_candidates = range(70, 121, 10)  # e.g., 70, 80, 90, 100, 110, 120

def run_simulation(order_level_exp, order_level_reg, seed=None):
    """
    Simulate the system for n_days and return various cost components averaged per day.
    """
    if seed is not None:
        np.random.seed(seed)
        random.seed(seed)
        
    total_holding = 0.0
    total_penalty = 0.0
    total_order_cost = 0.0
    total_daily_costs = 0.0

    # Initialize on-hand inventory and regular order pipeline (list of length lead_reg)
    inventory = initial_inv
    # Pipeline: each element represents the amount of regular order arriving in that many days.
    pipeline = [0.0] * lead_reg

    for day in range(n_days):
        # 1. Receive any regular orders scheduled for today (pop from pipeline[0])
        arrival = pipeline.pop(0)
        inventory += arrival
        # Append a 0 at the end to maintain the pipeline length
        pipeline.append(0.0)
        
        day_order_cost = 0.0

        # 2. Expedited order decision:
        # Expedited inventory position = on-hand (since lead_exp = 0)
        if inventory < order_level_exp:
            exp_order_qty = order_level_exp - inventory
            inventory += exp_order_qty  # arrives immediately
            day_order_cost += exp_order_qty * cost_exp
        else:
            exp_order_qty = 0
        
        # 3. Regular order decision:
        # Regular inventory position = on-hand inventory + outstanding regular orders in pipeline
        reg_inventory_position = inventory + sum(pipeline)
        if reg_inventory_position < order_level_reg:
            reg_order_qty = order_level_reg - reg_inventory_position
            # Place the order: schedule it to arrive in lead_reg days.
            pipeline[-1] += reg_order_qty  # add at the end of pipeline
            day_order_cost += reg_order_qty * cost_reg
        else:
            reg_order_qty = 0

        # 4. Demand realization 
        # Draw demand from Normal(mu,st_dev); we truncate at zero to avoid negative demand.
        d = np.random.normal(mu, st_dev)
        d = d if d > 0 else 0

        # Satisfy demand:
        inventory -= d

        # 5. Accumulate costs:
        if inventory >= 0:
            # Holding inventory cost for leftover units.
            holding = inventory * holding_cost
            penalty = 0.0
        else:
            # A negative inventory is a backlog (lost sales or pending fulfillment), with high penalty.
            holding = 0.0
            penalty = abs(inventory) * penalty_cost

        daily_cost = day_order_cost + holding + penalty

        total_order_cost += day_order_cost
        total_holding += holding
        total_penalty += penalty
        total_daily_costs += daily_cost

    # Average daily cost breakdown
    avg_order = total_order_cost / n_days
    avg_hold = total_holding / n_days
    avg_penalty = total_penalty / n_days
    avg_total = total_daily_costs / n_days

    return avg_total, avg_order, avg_hold, avg_penalty

# -----------------------
# Grid Search over candidate (order_level_exp, order_level_reg)
# -----------------------
results = {}
for ole in order_level_exp_candidates:
    for olr in order_level_reg_candidates:
        # Ensure feasibility: typically we need olr >= ole since expedited orders fill the faster channel
        if olr < ole:
            continue
        costs = []
        order_costs = []
        hold_costs = []
        penalty_costs = []
        for rep in range(replications_per_candidate):
            # Use a different seed for each replication
            seed = rep
            avg_total, avg_o, avg_h, avg_p = run_simulation(ole, olr, seed=seed)
            costs.append(avg_total)
            order_costs.append(avg_o)
            hold_costs.append(avg_h)
            penalty_costs.append(avg_p)
        avg_cost = mean(costs)
        results[(ole, olr)] = {
            'avg_total': avg_cost,
            'avg_order': mean(order_costs),
            'avg_holding': mean(hold_costs),
            'avg_penalty': mean(penalty_costs)
        }

# Find the candidate with the lowest average daily cost.
best_candidate = min(results, key=lambda x: results[x]['avg_total'])
best_result = results[best_candidate]

print("Simulation Results (averaged over {} days and {} replications per candidate):".format(n_days, replications_per_candidate))
print("Candidate (order_level_exp, order_level_reg):", best_candidate)
print("Average Total Daily Cost: {:.2f}".format(best_result['avg_total']))
print("  (Breakdown -> Ordering: {:.2f}, Holding: {:.2f}, Penalty: {:.2f})".format(
    best_result['avg_order'], best_result['avg_holding'], best_result['avg_penalty']))


# run the above 1000 times to get the mean and std deviation of the total daily cost
import statistics   
def run_multiple_simulations(num_runs=1000):
    all_costs = []
    for _ in range(num_runs):
        avg_total, _, _, _ = run_simulation(best_candidate[0], best_candidate[1])
        all_costs.append(avg_total)
    mean_cost = statistics.mean(all_costs)
    std_dev_cost = statistics.stdev(all_costs)
    return mean_cost, std_dev_cost
mean_cost, std_dev_cost = run_multiple_simulations(1000)
print("\nAfter running 1000 simulations:")
print("Mean Average Total Daily Cost: {:.2f}".format(mean_cost))
print("Standard Deviation of Average Total Daily Cost: {:.2f}".format(std_dev_cost))


    
# For reference, we also print the result for the baseline candidate (50, 80)
baseline = (50, 80)
if baseline in results:
    br = results[baseline]
    print("\nBaseline (50,80) Average Total Daily Cost: {:.2f}".format(br['avg_total']))
    print("  (Breakdown -> Ordering: {:.2f}, Holding: {:.2f}, Penalty: {:.2f})".format(
        br['avg_order'], br['avg_holding'], br['avg_penalty']))

Simulation Results (averaged over 1000 days and 50 replications per candidate):
Candidate (order_level_exp, order_level_reg): (50, 70)
Average Total Daily Cost: 3438.92
  (Breakdown -> Ordering: 3295.46, Holding: 100.56, Penalty: 42.90)

After running 1000 simulations:
Mean Average Total Daily Cost: 3439.04
Standard Deviation of Average Total Daily Cost: 39.49

Baseline (50,80) Average Total Daily Cost: 3439.07
  (Breakdown -> Ordering: 3295.56, Holding: 100.61, Penalty: 42.90)


In [14]:
import numpy as np

# ----------------------------
# Model Parameters (Defaults)
# ----------------------------
n_days = 1000          # planning horizon (days per simulation replication)
initial_inv = 40       # initial on-hand inventory
cost_reg = 100.00      # cost per unit for regular orders
cost_exp = 110.00      # cost per unit for expedited orders
lead_reg = 110         # lead time (days) for regular orders
lead_exp = 0           # lead time (days) for expedited orders
holding_cost = 5.00    # per unit per day
penalty_cost = 495.00  # penalty for each backordered unit per day
mu = 30.0              # mean daily demand
st_dev = 10.0          # standard deviation of daily demand

# For simulation reproducibility, you can set a seed.
np.random.seed(42)

# ----------------------------
# Simulation of One Replication
# ----------------------------
def run_simulation(order_level_exp, order_level_reg):
    """
    Simulate the inventory system for n_days with given order-level targets.
    Returns cumulative cost breakdown:
      total_order_cost, total_holding_cost, total_penalty_cost.
    """
    # Initialize the simulation state:
    on_hand = initial_inv         # current inventory (can be negative indicating backorders)
    # regular_shipments dictionary: keys=arrival_day, value=quantity arriving that day.
    regular_shipments = {}
    
    # cost accumulators:
    total_order_cost = 0.0
    total_holding_cost = 0.0
    total_penalty_cost = 0.0

    # For each day in the simulation horizon:
    for day in range(n_days):
        # 1. RECEIVE ORDERS (arrivals)
        if day in regular_shipments:
            on_hand += regular_shipments.pop(day)

        # 2. Place Expedited Order: Check “expedited position”
        # With lead_exp = 0, expedited position equals on_hand.
        if on_hand < order_level_exp:
            exp_qty = order_level_exp - on_hand
            # Order cost (expedited):
            total_order_cost += cost_exp * exp_qty
            # With lead_exp = 0, the units arrive immediately.
            on_hand += exp_qty

        # 3. Place Regular Order: Compute “regular inventory position”
        # Regular position = on_hand + units scheduled to arrive within lead_reg days.
        reg_inventory_position = on_hand
        # Sum regular shipments scheduled for days from (day+1) to (day+lead_reg) - inclusive.
        for arrival_time in range(day + 1, day + int(lead_reg) + 1):
            if arrival_time in regular_shipments:
                reg_inventory_position += regular_shipments[arrival_time]
        if reg_inventory_position < order_level_reg:
            reg_qty = order_level_reg - reg_inventory_position
            total_order_cost += cost_reg * reg_qty
            # The regular order will arrive exactly lead_reg days later.
            arrival_day = day + int(lead_reg)
            regular_shipments[arrival_day] = regular_shipments.get(arrival_day, 0) + reg_qty
        
        # 4. Demand realization:
        demand = np.random.normal(mu, st_dev)
        # It is possible for a negative draw on demand, so we ensure nonnegative demand.
        demand = max(demand, 0)
        on_hand = on_hand - demand

        # 5. Holding and Penalty Costs (at end of day)
        if on_hand >= 0:
            total_holding_cost += holding_cost * on_hand
        else:
            # backorders; on_hand is negative.
            total_penalty_cost += penalty_cost * abs(on_hand)
    
    return total_order_cost, total_holding_cost, total_penalty_cost


# ----------------------------
# Replications and Grid Search
# ----------------------------

def simulate_policy(order_level_exp, order_level_reg, replications=50):
    """
    Runs several replications of simulation under the given order_level pair.
    Returns average daily total cost as well as breakdown over replications.
    """
    cum_order = 0.0
    cum_hold = 0.0
    cum_penalty = 0.0
    for rep in range(replications):
        # Optionally, change the random seed for each replication.
        # (Here we rely on the built-in random stream.)
        order_cost, hold_cost, penalty_cost_total = run_simulation(order_level_exp, order_level_reg)
        cum_order += order_cost
        cum_hold += hold_cost
        cum_penalty += penalty_cost_total
    # Average costs per replication over n_days:
    avg_order = cum_order / replications / n_days
    avg_hold = cum_hold / replications / n_days
    avg_penalty = cum_penalty / replications / n_days
    avg_total = avg_order + avg_hold + avg_penalty
    return avg_total, avg_order, avg_hold, avg_penalty

# Grid search candidate range (we assume the search budget allows many evals)
# For expedited order target, we search between 30 and 80 (step 5)
# For regular order target, we search between 50 and 150 (step 10)
exp_candidates = np.arange(30, 85, 5)
reg_candidates = np.arange(50, 160, 10)

best_cost = float('inf')
best_policy = None
results = []  # to store all results

for order_level_exp in exp_candidates:
    for order_level_reg in reg_candidates:
        # Only consider policies with order_level_reg >= order_level_exp (logical ordering)
        if order_level_reg < order_level_exp:
            continue
        avg_total, avg_order, avg_hold, avg_penalty = simulate_policy(order_level_exp, order_level_reg, replications=50)
        results.append((order_level_exp, order_level_reg, avg_total, avg_order, avg_hold, avg_penalty))
        if avg_total < best_cost:
            best_cost = avg_total
            best_policy = (order_level_exp, order_level_reg, avg_total, avg_order, avg_hold, avg_penalty)

# ----------------------------
# Print the Best Found Policy
# ----------------------------
print("Best found policy (order_level_exp, order_level_reg):")
print(f"  Expedited Target: {best_policy[0]}")
print(f"  Regular Target:   {best_policy[1]}")
print(f"Estimated average daily cost: {best_policy[2]:.2f}")
print("   (ordering: {:.2f}, holding: {:.2f}, penalty: {:.2f})".format(
    best_policy[3], best_policy[4], best_policy[5]))

# Optionally, print all candidate policies and corresponding average daily costs.
print("\nAll evaluated policies:")
print("Exp_Level\tReg_Level\tTotal_Cost\tOrder\tHolding\tPenalty")
for rec in results:
    print(f"{rec[0]}\t\t{rec[1]}\t\t{rec[2]:.2f}\t\t{rec[3]:.2f}\t{rec[4]:.2f}\t{rec[5]:.2f}")

Best found policy (order_level_exp, order_level_reg):
  Expedited Target: 50
  Regular Target:   150
Estimated average daily cost: 3424.71
   (ordering: 3286.97, holding: 102.08, penalty: 35.66)

All evaluated policies:
Exp_Level	Reg_Level	Total_Cost	Order	Holding	Penalty
30		50		5287.79		3295.99	19.97	1971.82
30		60		5300.52		3298.72	19.97	1981.83
30		70		5297.04		3300.35	19.93	1976.76
30		80		5272.91		3294.91	20.14	1957.87
30		90		5272.41		3293.78	20.30	1958.33
30		100		5276.97		3296.33	20.39	1960.26
30		110		5253.71		3289.44	20.71	1943.57
30		120		5303.14		3304.21	20.57	1978.36
30		130		5258.12		3290.94	21.09	1946.10
30		140		5246.69		3289.48	21.31	1935.90
30		150		5264.52		3294.83	21.50	1948.18
35		50		4308.64		3293.63	35.04	979.97
35		60		4308.95		3297.72	34.87	976.36
35		70		4317.43		3302.92	34.71	979.79
35		80		4306.84		3295.50	35.13	976.21
35		90		4318.97		3302.59	35.02	981.37
35		100		4289.07		3291.35	35.45	962.26
35		110		4304.70		3295.63	35.56	973.51
35		120		4280.89		3288.6

## Claude Sonnet

In [23]:
response = claude_client.messages.create(
    model="claude-3-5-sonnet-20241022",  # Or another Claude 3 model
    max_tokens=4096,
    system="You are an expert in probability theory and stochastic modeling.",
    messages=[
        {"role": "user", "content": prompt}
    ]
)

print(response.content[0].text)

I'll approach this problem using both analytical insights and simulation. Let me break this down systematically.

1. ANALYTICAL INSIGHTS:
The problem is a dual-sourcing inventory system with different lead times. Some key observations:

a) The expedited position needs to cover demand during lead_exp (0 days) plus safety stock
b) The regular position needs to cover demand during lead_reg days plus safety stock
c) The difference between order_level_reg and order_level_exp should approximately cover demand during the lead time difference (l days)

For normally distributed demand:
- Expected demand during lead time = mu * lead_time
- Safety stock ≈ z * st_dev * sqrt(lead_time), where z is service level factor

2. SIMULATION APPROACH:
I'll implement a simulation that:
- Tracks both inventory positions
- Simulates daily demands
- Computes costs
- Uses a simple search strategy to find good order levels

Here's the implementation:

```python
import numpy as np
from typing import Tuple, List
im

In [27]:
import numpy as np
from typing import Tuple, List
import heapq

class DualSourceInventorySystem:
    def __init__(self, 
                 n_days: int = 1000,
                 initial_inv: float = 40,
                 cost_reg: float = 100.0,
                 cost_exp: float = 110.0,
                 lead_reg: int = 2,
                 lead_exp: int = 0,
                 holding_cost: float = 5.0,
                 penalty_cost: float = 495.0,
                 mu: float = 30.0,
                 st_dev: float = 10.0):
        
        self.n_days = n_days
        self.initial_inv = initial_inv
        self.cost_reg = cost_reg
        self.cost_exp = cost_exp
        self.lead_reg = lead_reg
        self.lead_exp = lead_exp
        self.holding_cost = holding_cost
        self.penalty_cost = penalty_cost
        self.mu = mu
        self.st_dev = st_dev
        
    def simulate_one_run(self, order_level_reg: float, order_level_exp: float) -> float:
        np.random.seed()  # Different seed for each run
        
        # Initialize inventory and order queues
        inventory = self.initial_inv
        regular_arrivals = []  # Priority queue for regular orders
        expedited_arrivals = []  # Priority queue for expedited orders
        
        total_cost = 0
        
        for day in range(self.n_days):
            # Get any arriving orders
            while regular_arrivals and regular_arrivals[0][0] <= day:
                _, qty = heapq.heappop(regular_arrivals)
                inventory += qty
            
            while expedited_arrivals and expedited_arrivals[0][0] <= day:
                _, qty = heapq.heappop(expedited_arrivals)
                inventory += qty
                
            # Calculate inventory positions
            exp_in_transit = sum(qty for _, qty in expedited_arrivals)
            reg_in_transit = sum(qty for _, qty in regular_arrivals)
            
            exp_position = inventory + exp_in_transit
            reg_position = inventory + exp_in_transit + reg_in_transit
            
            # Place orders
            if exp_position < order_level_exp:
                exp_order = order_level_exp - exp_position
                heapq.heappush(expedited_arrivals, (day + self.lead_exp, exp_order))
                total_cost += self.cost_exp * exp_order
            
            if reg_position < order_level_reg:
                reg_order = order_level_reg - reg_position
                heapq.heappush(regular_arrivals, (day + self.lead_reg, reg_order))
                total_cost += self.cost_reg * reg_order
            
            # Handle demand
            demand = np.random.normal(self.mu, self.st_dev)
            inventory -= demand
            
            # Calculate holding/penalty costs
            if inventory > 0:
                total_cost += self.holding_cost * inventory
            else:
                total_cost += self.penalty_cost * (-inventory)
        
        return total_cost / self.n_days

    def find_optimal_levels(self, budget: int = 1000) -> Tuple[float, float, float]:
        # Initial guess based on analytical insights
        base_exp = self.mu * self.lead_exp + 2 * self.st_dev * np.sqrt(self.lead_exp + 1)
        base_reg = self.mu * self.lead_reg + 2 * self.st_dev * np.sqrt(self.lead_reg + 1)
        
        best_cost = float('inf')
        best_reg = base_reg
        best_exp = base_exp
        
        # Simple grid search around initial guess
        searches = min(budget // 4, 100)  # Use 25% of budget for search points
        replications = max(2, budget // searches)  # Remaining budget for replications
        
        for i in range(searches):
            reg_mult = 0.7 + 0.6 * (i / searches)
            exp_mult = 0.7 + 0.6 * (i / searches)
            
            reg_level = base_reg * reg_mult
            exp_level = base_exp * exp_mult
            
            # Multiple replications for each point
            costs = [self.simulate_one_run(reg_level, exp_level) 
                    for _ in range(replications)]
            avg_cost = np.mean(costs)
            
            if avg_cost < best_cost:
                best_cost = avg_cost
                best_reg = reg_level
                best_exp = exp_level
        
        return best_reg, best_exp, best_cost

# Run optimization
system = DualSourceInventorySystem()
reg_level, exp_level, avg_cost = system.find_optimal_levels(budget=1000)

print(f"Recommended order levels:")
print(f"Regular: {reg_level:.2f}")
print(f"Expedited: {exp_level:.2f}")
print(f"Estimated average daily cost: ${avg_cost:.2f}")

# run the code 1000 times to find the estimated average daily cost at the optimal order levels and report the mean and std deviation
def run_multiple_simulations(system: DualSourceInventorySystem, reg_level: float, exp_level: float, runs: int = 1000) -> Tuple[float, float]:
    costs = []
    for _ in range(runs):
        daily_cost = system.simulate_one_run(reg_level, exp_level)
        costs.append(daily_cost)
    mean_cost = np.mean(costs)
    std_dev_cost = np.std(costs)
    return mean_cost, std_dev_cost

mean_cost, std_dev_cost = run_multiple_simulations(system, reg_level, exp_level, runs=1000)
print(f"Estimated average daily cost over 1000 runs: ${mean_cost:.2f} (std dev: ${std_dev_cost:.2f})")


Recommended order levels:
Regular: 119.63
Expedited: 25.28
Estimated average daily cost: $3273.72
Estimated average daily cost over 1000 runs: $3313.06 (std dev: $58.44)


## Deepseek R1

In [26]:
import numpy as np

def simulate_inventory(order_level_reg, order_level_exp, n_days=1000, initial_inv=40,
                       cost_reg=100.0, cost_exp=110.0, lead_reg=110, lead_exp=0,
                       holding_cost=5.0, penalty_cost=495.0, mu=30.0, st_dev=10.0,
                       seed=None):
    if seed is not None:
        np.random.seed(seed)
    
    on_hand = initial_inv
    # Track regular orders scheduled for each future day
    regular_orders = np.zeros(n_days + lead_reg + 1, dtype=float)
    total_holding = 0.0
    total_penalty = 0.0
    total_cost_reg = 0.0
    total_cost_exp = 0.0
    
    for day in range(n_days):
        # Receive regular orders arriving today
        on_hand += regular_orders[day]
        regular_orders[day] = 0  # Reset after receiving
        
        # Generate demand
        demand = max(0, np.random.normal(mu, st_dev))
        inv_after_demand = on_hand - demand
        
        # Calculate holding/penalty costs
        if inv_after_demand >= 0:
            total_holding += inv_after_demand * holding_cost
            on_hand = inv_after_demand
        else:
            total_penalty += -inv_after_demand * penalty_cost
            on_hand = 0
        
        # Expedited order (arrives immediately)
        expedited_order = max(0, order_level_exp - on_hand)
        total_cost_exp += expedited_order * cost_exp
        on_hand += expedited_order
        
        # Calculate regular inventory position (on-hand + orders in next lead_reg days)
        start = day + 1
        end = min(day + lead_reg + 1, len(regular_orders))
        regular_inv_position = on_hand + regular_orders[start:end].sum()
        
        # Place regular order
        regular_order = max(0, order_level_reg - regular_inv_position)
        if regular_order > 0:
            arrival_day = day + lead_reg
            if arrival_day < len(regular_orders):
                regular_orders[arrival_day] += regular_order
            total_cost_reg += regular_order * cost_reg
    
    total_cost = total_holding + total_penalty + total_cost_reg + total_cost_exp
    return total_cost / n_days

# Example validation with user's baseline
baseline_cost = simulate_inventory(80, 50, seed=42)
print(f"Baseline (80, 50) average daily cost: ${baseline_cost:.2f}")

# Optimization using grid search
def optimize(budget=1000):
    # Test plausible ranges based on problem parameters
    reg_values = np.linspace(80, 200, 5).astype(int)  # 80, 100, 150, 200
    exp_values = np.linspace(40, 60, 5).astype(int)   # 40, 50, 60
    best_cost = float('inf')
    best_reg, best_exp = 80, 50  # Initialize with baseline
    
    evaluations = 0
    for reg in reg_values:
        for exp in exp_values:
            if evaluations >= budget:
                break
            cost = simulate_inventory(reg, exp, n_days=1000, seed=42)
            evaluations += 1
            if cost < best_cost:
                best_cost = cost
                best_reg, best_exp = reg, exp
    return best_reg, best_exp, best_cost

optimal_reg, optimal_exp, avg_cost = optimize(budget=1000)
print(f"\nOptimal order_level_reg: {optimal_reg}")
print(f"Optimal order_level_exp: {optimal_exp}")
print(f"Average daily cost: ${avg_cost:.2f}")

# run the thing 1000 times to get mean and std deviation for the optimal order levels
mean_cost, std_dev_cost = run_multiple_simulations(system, optimal_reg, optimal_exp, runs=1000)
print(f"Estimated average daily cost over 1000 runs: ${mean_cost:.2f} (std dev: ${std_dev_cost:.2f})")


Baseline (80, 50) average daily cost: $3457.27

Optimal order_level_reg: 80
Optimal order_level_exp: 50
Average daily cost: $3457.27
Estimated average daily cost over 1000 runs: $4559.81 (std dev: $126.33)
