In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from gurobipy import Model, GRB
from sklearn.ensemble import RandomForestRegressor

### Data Preprocessing and Helper Functions

In [8]:
# Set page configuration
seed = 800
np.random.seed(seed)

In [9]:
df = pd.read_csv('data/med_inv_dataset.csv')

df.columns = df.columns.str.lower()
df = df.dropna()
df['dateofbill'] = pd.to_datetime(df['dateofbill'])
df['month_name'] = df['dateofbill'].dt.strftime('%B') # Extract month name
df['month_number'] = df['dateofbill'].dt.month  # Extract month number
df['week_number'] = df['dateofbill'].dt.isocalendar().week  # Extract week number

# Create a bi-weekly period column
df['bi_weekly'] = (df['dateofbill'].dt.day - 1) // 14 + 1

In [10]:
# Group by drug, subcat, month_name, month_number, and bi-weekly period
df_bi_weekly = df.groupby(['subcat', 'month_name', 'month_number', 'bi_weekly'], as_index=False).agg(
    {
        'quantity': 'sum',
        'returnquantity': 'sum',
        'final_cost': 'sum',
        'final_sales': 'sum',
        'rtnmrp': 'sum'
    }
)

# Step 1: Collect top 5 subcategories with the highest sum of quantity
top_5_subcats = df_bi_weekly.groupby('subcat')['quantity'].sum().nlargest(5).index

# Step 2: Filter the dataframe for only the top 5 subcategories
filtered_top_5_per_subcat = df_bi_weekly[df_bi_weekly['subcat'].isin(top_5_subcats)]

# filtered_df_bi_weekly = df_bi_weekly.merge(filtered_top_5_per_subcat[['subcat']], on=['subcat'])
filtered_df_bi_weekly = filtered_top_5_per_subcat.sort_values(by=['subcat', 'month_number', 'bi_weekly'])
# Add a biweekly index for every drugname in every subcat
filtered_df_bi_weekly['biweekly_index'] = (
    filtered_df_bi_weekly.groupby(['subcat'])
    .cumcount() + 1
)

In [11]:
# Filter rows where month_number is 4, 5, or 6 (2 and 3 is added just for their history)
filtered_months_df = filtered_df_bi_weekly[filtered_df_bi_weekly['month_number'].isin([2, 3, 4, 5, 6])]

# The rest of the DataFrame but will remove these months later
rest_df = filtered_df_bi_weekly


In [12]:
def calculate_last_three_cycles(df, subcat, horizon = 'biweekly_index', quanity= 'quantity', train=False):
    ml_df = df[(df['subcat'] == subcat)][[horizon,'subcat', quanity]]

    ml_df['quantity_lastcycle']=ml_df[quanity].shift(+1)
    ml_df['quantity_2cycleback']=ml_df[quanity].shift(+2)
    ml_df['quantity_3cycleback']=ml_df[quanity].shift(+3)
    ml_df['quantity_4cycleback']=ml_df[quanity].shift(+4)
    ml_df['quantity_5cycleback']=ml_df[quanity].shift(+5)

    if train:
        ml_df = ml_df[~ml_df[horizon].isin([9, 10, 11, 12, 13, 14, 15, 16, 17])]

    ml_df = ml_df.dropna() #dropping na is necessary to avoid model failure other option

    X = ml_df[['subcat', horizon,'quantity_lastcycle', 'quantity_2cycleback', 'quantity_3cycleback', 'quantity_4cycleback', 'quantity_5cycleback']]
    y = ml_df[['subcat', quanity]]
    return X, y

In [13]:
trainX = pd.DataFrame(columns=['subcat', 'quantity_lastcycle', 'quantity_2cycleback', 'quantity_3cycleback', 
                               'quantity_4cycleback','quantity_5cycleback'])
trainY = pd.DataFrame(columns=['subcat', 'quantity'])

trainX_rtn = pd.DataFrame(columns=['subcat', 'quantity_lastcycle', 'quantity_2cycleback', 'quantity_3cycleback', 
                               'quantity_4cycleback','quantity_5cycleback'])
trainY_rtn = pd.DataFrame(columns=['subcat', 'returnquantity'])

In [14]:
list_subcat = filtered_top_5_per_subcat['subcat'].unique().tolist()

subcat_dict = {
    list_subcat[0]: {'Capacity': 200, 'shelf_life': (3,8), 'unit_cost': (40.85,322.27), 'salvage_value': (1,617.76)},
    list_subcat[1]: {'Capacity': 400, 'shelf_life': (2,5), 'unit_cost': (40.00,3178.00), 'salvage_value': (1,8014.0)},
    list_subcat[2]: {'Capacity': 100, 'shelf_life': (1,4), 'unit_cost': (40.00,3719.00), 'salvage_value': (1,4462.8)},
    list_subcat[3]: {'Capacity': 100, 'shelf_life': (1,3), 'unit_cost': (42.95,594.95), 'salvage_value': (1,327.11)},
    list_subcat[4]: {'Capacity': 300, 'shelf_life': (5,10), 'unit_cost': (40.00,3491.09), 'salvage_value': (1,1226.0)}
}

# st.write(subcat_dict)

for key in list_subcat:
    X, y = calculate_last_three_cycles(rest_df, subcat = key, quanity = 'quantity', train=True)
    X_rtn, y_rtn = calculate_last_three_cycles(rest_df, subcat = key, quanity = 'returnquantity', train=True)
    trainX = pd.concat([X, trainX])
    trainY = pd.concat([y, trainY])
    trainX_rtn = pd.concat([X_rtn, trainX_rtn])
    trainY_rtn = pd.concat([y_rtn, trainY_rtn])


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



In [15]:
testX = pd.DataFrame(columns=['subcat', 'quantity_lastcycle', 'quantity_2cycleback', 'quantity_3cycleback',
                              'quantity_4cycleback','quantity_5cycleback'])
testY = pd.DataFrame(columns=['subcat', 'quantity'])

testX_rtn = pd.DataFrame(columns=['subcat', 'quantity_lastcycle', 'quantity_2cycleback', 'quantity_3cycleback', 
                               'quantity_4cycleback','quantity_5cycleback'])
testY_rtn = pd.DataFrame(columns=['subcat', 'returnquantity'])

In [16]:
for key in list_subcat:
    X, y = calculate_last_three_cycles(filtered_months_df, subcat = key, quanity = 'quantity')
    X_rtn, y_rtn = calculate_last_three_cycles(filtered_months_df, subcat = key, quanity = 'returnquantity')
    testX = pd.concat([X, testX])
    testY = pd.concat([y, testY])
    testX_rtn = pd.concat([X_rtn, testX_rtn])
    testY_rtn = pd.concat([y_rtn, testY_rtn])


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Model converted into a function for reusablilty

In [21]:
def optimize_inventory(selected_subcat, holding_cost, stockout_penalty, waste_penalty):
    model = RandomForestRegressor(random_state=seed)
    model_rtn = RandomForestRegressor(random_state=seed)

    # This is for demand
    tempx = trainX[(trainX['subcat'] == selected_subcat)].copy()
    t_x = tempx[['quantity_lastcycle', 'quantity_2cycleback', 'quantity_3cycleback','quantity_4cycleback','quantity_5cycleback']]
    t_y = trainY[(trainY['subcat']==selected_subcat)]['quantity']
    model.fit(t_x, t_y)
    test = testX[(testX['subcat'] == selected_subcat)][['quantity_lastcycle', 'quantity_2cycleback', 'quantity_3cycleback',
                                                                                                'quantity_4cycleback','quantity_5cycleback']]
    predicted_demand = model.predict(test)
    predicted_demand = np.ceil(predicted_demand).astype(int) 

    # This is for return qty
    tempx_rtn = trainX_rtn[(trainX_rtn['subcat'] == selected_subcat)].copy()
    t_x = tempx_rtn[['quantity_lastcycle', 'quantity_2cycleback', 'quantity_3cycleback','quantity_4cycleback','quantity_5cycleback']]
    t_y = trainY_rtn[(trainY_rtn['subcat']==selected_subcat)]['returnquantity']

    model_rtn.fit(t_x, t_y)
    
    test = testX_rtn[(testX_rtn['subcat'] == selected_subcat)][['quantity_lastcycle', 'quantity_2cycleback', 'quantity_3cycleback',
                                                                                                'quantity_4cycleback','quantity_5cycleback']]
    predicted_return = model_rtn.predict(test)
    predicted_return = np.ceil(predicted_return).astype(int) 
    
    test_index = testX[(testX['subcat'] == selected_subcat)]['biweekly_index']
    
    T = len(predicted_demand)-1

    temp_dict = subcat_dict[selected_subcat]

    # Create a DataFrame with biweekly_index and predicted_demand
    np.random.seed(seed)
    prediction_df = pd.DataFrame({
        'biweekly_index': test_index,
        'Predicted_Demand': predicted_demand,
        'Return_Prediction': predicted_return,
        "Unit_Cost": np.random.uniform(temp_dict['unit_cost'][0], temp_dict['unit_cost'][1], len(predicted_demand)),  
        "Salvage_Value": np.random.uniform(temp_dict['salvage_value'][0], temp_dict['salvage_value'][1], len(predicted_demand)),
        "Shelf_Life": np.random.randint(temp_dict['shelf_life'][0], temp_dict['shelf_life'][1], len(predicted_demand))  
    })

    # Create a temporary table with biweekly_index, month_name, and bi_weekly
    temp_table = filtered_df_bi_weekly[['biweekly_index', 'month_name', 'bi_weekly']].drop_duplicates()

    # Join prediction_df with the temp_table on biweekly_index
    prediction_df = prediction_df.merge(temp_table, on='biweekly_index', how='left')
    # Reorder columns to place month_name and bi_weekly at the beginning
    columns_order = ['month_name', 'bi_weekly'] + [col for col in prediction_df.columns if col not in ['month_name', 'bi_weekly']]
    prediction_df = prediction_df[columns_order]

    # Rename 'bi_weekly' to 'cycle_number'
    prediction_df.rename(columns={'bi_weekly': 'cycle_number'}, inplace=True)

    # Drop the 'biweekly_index' column
    prediction_df.drop(columns=['biweekly_index'], inplace=True)

    prediction_df = prediction_df.reset_index(drop=True)

    # Gurobi Model
    model = Model("Multi_Period_Medical_Inventory_Optimization")

    # Decision Variables
    Q = model.addVars(prediction_df.index, vtype=GRB.CONTINUOUS, name="OrderQty")  # Order quantity

    # State Variables
    I = model.addVars(prediction_df.index, vtype=GRB.CONTINUOUS, name="Inventory")  # Inventory level

    # Auxiliary Variables (Derived)
    Y = model.addVars(prediction_df.index, vtype=GRB.CONTINUOUS, name="Expired")   # Expired stock
    S = model.addVars(prediction_df.index, vtype=GRB.CONTINUOUS, name="Stockout")  # Stockout

    # Objective: Minimize Total Cost
    model.setObjective(
        sum(prediction_df.loc[i, "Unit_Cost"] * Q[i] + 
            holding_cost * (I[i]) +
            stockout_penalty * S[i] + 
            waste_penalty * Y[i] - 
            prediction_df.loc[i, "Salvage_Value"] * prediction_df.loc[i, "Return_Prediction"]
            for i in prediction_df.index),
        GRB.MINIMIZE
    )

    # Constraints:
    for i in prediction_df.index:
        # Safety Stock Constraint
        safety_stock = 0.2 * prediction_df["Predicted_Demand"]  # Example: 20% of demand as buffer
        
        # Inventory Balance Constraint
        if i >= T:  # Ensure we don't reference out-of-bounds indices
            continue
        
        # Starting Inventory at capacity
        model.addConstr(I[0] == subcat_dict[selected_subcat]['Capacity'], name=f"Initial_Inventory{i}")

        # Ensure inventory level meets safety stock requirements
        model.addConstr(I[i] >= safety_stock[i], name=f"SafetyStock_{i}")
        
        model.addConstr(I[i+1] == I[i] + Q[i] + prediction_df.loc[i, "Return_Prediction"] - prediction_df.loc[i, "Predicted_Demand"] - Y[i], name=f"Inventory_Balance_{i}")

        model.addConstr(I[i+1] <= subcat_dict[selected_subcat]['Capacity'], name=f"Space_constraint{i}")
        
        # Expired Inventory Constraint
        model.addConstr(Y[i] <= I[i], name=f"Expiry_{i}")

        # Stockout Constraint
        model.addConstr(S[i] >= prediction_df.loc[i, "Predicted_Demand"] - I[i] - Q[i], name=f"Stockout_{i}")
    
    # Solve Model
    model.optimize()

    prediction_df["Optimal_Order"] = [Q[i].x for i in prediction_df.index]
    prediction_df["Inventory_Level"] = [I[i].x for i in prediction_df.index]
    prediction_df["Expired_Stock"] = [Y[i].x for i in prediction_df.index]
    prediction_df["Stockouts"] = [S[i].x for i in prediction_df.index]
    return prediction_df

### User Selection to run Sensitivity Analysis

Please select a subcat and feel free to adjust different values for the penalities

In [22]:
i = 0
for subcate in list_subcat:
    print(f"Number {i}: {subcate}")
    i+=1

Number 0: INHALERS & RESPULES
Number 1: INJECTIONS
Number 2: IV FLUIDS, ELECTROLYTES, TPN
Number 3: LIQUIDS & SOLUTIONS
Number 4: TABLETS & CAPSULES


In [23]:
selected_subcat = list_subcat[1]
print(f"Selected subcategory: {selected_subcat}")

Selected subcategory: INJECTIONS


In [25]:
filtered_df = trainX[trainX['subcat'] == selected_subcat]
# selected_drugname = st.selectbox('Select a drug', filtered_df['drugname'].unique())

# Cost Coefficients_1
holding_cost_1 = 2 # Cost per unit held
stockout_penalty_1 = 50  # Cost per stockout
waste_penalty_1 = 10 # Cost for expired stock

# Cost Coefficients_2
holding_cost_2 = 2 # Cost per unit held
stockout_penalty_2 = 50  # Cost per stockout
waste_penalty_2 = 10 # Cost for expired stock

# Cost Coefficients_3
holding_cost_3 = 2 # Cost per unit held
stockout_penalty_3 = 50  # Cost per stockout
waste_penalty_3 = 10 # Cost for expired stock

In [26]:
# Predict based on sliders
prediction_df_1 = optimize_inventory(selected_subcat, holding_cost_1, stockout_penalty_1, waste_penalty_1)
prediction_df_2 = optimize_inventory(selected_subcat, holding_cost_2, stockout_penalty_2, waste_penalty_2)
prediction_df_3 = optimize_inventory(selected_subcat, holding_cost_3, stockout_penalty_3, waste_penalty_3)

Set parameter Username
Set parameter LicenseID to value 2611369
Academic license - for non-commercial use only - expires 2026-01-17
Gurobi Optimizer version 12.0.1 build v12.0.1rc0 (mac64[x86] - Darwin 24.4.0 24E248)

CPU model: Intel(R) Core(TM) i7-8557U CPU @ 1.70GHz
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads

Optimize a model with 48 rows, 40 columns and 96 nonzeros
Model fingerprint: 0x36096da4
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [2e+00, 3e+03]
  Bounds range     [0e+00, 0e+00]
  RHS range        [3e+01, 5e+02]
Presolve removed 37 rows and 23 columns
Presolve time: 0.02s
Presolved: 11 rows, 17 columns, 31 nonzeros

Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0   -1.0165153e+06   1.581000e+02   0.000000e+00      0s
       6    3.0855642e+05   0.000000e+00   0.000000e+00      0s

Solved in 6 iterations and 0.03 seconds (0.00 work units)
Optimal objective  3.085564199e+05
Gurobi Optimi

In [27]:
# Combine both sets into one DataFrame for easy plotting
prediction_df_1['Set'] = 'Set 1'
prediction_df_2['Set'] = 'Set 2'
prediction_df_3['Set'] = 'Set 3'

In [28]:
# Combine the two DataFrames
combined_df = pd.concat([prediction_df_1[['Optimal_Order', 'Set']], prediction_df_2[['Optimal_Order', 'Set']], prediction_df_3[['Optimal_Order', 'Set']]])

In [30]:
fig = px.bar(
    combined_df,
    x=combined_df.index,
    y='Optimal_Order',
    color='Set',
    barmode='group',
    labels={'Optimal_Order': 'Order Quantity', 'x': 'Cycle'},
    title=f"Order Quantity Comparison between Three Sets for {selected_subcat}",
)
# Update layout to show all x-axis labels
fig.update_layout(
    xaxis=dict(
        tickmode='linear'
    )
)