## Inventory Optimization with Data Science: Hands-On Tutorial with Python

### Part 2: A Gentle Introduction to Implementing the Markov Process for Inventory Optimization.

In [14]:
from typing import Dict

from rich import pretty
pretty.install()

MarkovRewProcessDict = {"Current State A":{"NextS1fromA": ("PNextS1fromA","Reward1")
                                           ,"NextS2fromA, from A": ("PNextS2fromA","Reward2")},
                    
                     "Current State B":{"NextS1fromB": ("PNextS1fromB","Reward3"),
                                        "NextS2fromB": ("PNextS2fromB","Reward4")}}

MarkovRewProcessDict

In [15]:
for (state, value) in MarkovRewProcessDict.items():
    
    print("The Current state is: {}".format(state))
    
    for (next_state, (trans_prob,reward)) in value.items():
        
        print("The Next State is {} \nwith Probability of: {} \
               \nand Reward: {}".format(next_state, trans_prob, reward))
    

The Current state is: Current State A
The Next State is NextS1fromA 
with Probability of: PNextS1fromA                
and Reward: Reward1
The Next State is NextS2fromA, from A 
with Probability of: PNextS2fromA                
and Reward: Reward2
The Current state is: Current State B
The Next State is NextS1fromB 
with Probability of: PNextS1fromB                
and Reward: Reward3
The Next State is NextS2fromB 
with Probability of: PNextS2fromB                
and Reward: Reward4


## Reward Modeling (two cases)

- Case for 
- $0 \leq i \leq \alpha + \beta -1$ 

$$R((\alpha , \beta) \to ((\alpha + \beta) -i, C-(\alpha + \beta) ) = -h\alpha$$

- For the case demand excedding $(\alpha + \beta)$

- $$R((\alpha , \beta) \to (0, C-(\alpha + \beta) ) = -h\alpha -p(\sum_{j=\alpha + \beta +1}^{\infin}f(j)(j-(\alpha +\beta)))$$
- $$=h\alpha-p(\lambda(1-F(\alpha+\beta-1)) - (\alpha+\beta)(1-F(\alpha+\beta)))$$

In [16]:
# need numpy to do some numeric calculation
import numpy as np

# poisson is used to find pdf of Poisson distribution 
from scipy.stats import poisson

In [17]:

MarkovRewProcessDict: Dict[tuple, Dict[tuple, tuple]] = {}

user_capacity = 2
user_poisson_lambda = 1.0

holding_cost = 1
stockout_cost = 10

In [18]:
# We are condiering all possible states
# That we can face in running this bike shop
for alpha in range(user_capacity+1):                            
                                                               
    for beta in range(user_capacity + 1 - alpha):
        
        # This is St, the current state
        state = (alpha, beta)                                   

        # This is initial inventory, total bike you have at 8AM 
        initial_inventory = alpha + beta                         
        
        # The beta1 is the beta in next state, irrespctive of current state (as the decsion policy is constant)
        beta1 = user_capacity - initial_inventory
        
        base_reward = -alpha* holding_cost
        # List of all possible demand you can get
        for i in range(initial_inventory +1):

            # if initial demand can meet the demand
            if i <= (initial_inventory-1):
                
                # probality of specifc demand can happen
                transition_prob = poisson.pmf(i,user_poisson_lambda)
                
                # If we already defined teh state in our data (MarkovProcessDict)
                if state in MarkovRewProcessDict:
                    
                    MarkovRewProcessDict[state][(initial_inventory - i, beta1)]= (transition_prob, base_reward)
                
                else:
                    
                    MarkovRewProcessDict[state] = {(initial_inventory - i, beta1):(transition_prob, base_reward)}
                         
            # if initial demand can not meet the demand
            else:
                
                transition_prob = 1- poisson.cdf(initial_inventory -1, user_poisson_lambda)
                # probability of not meeting the demands
                transition_prob1 = 1- poisson.cdf(initial_inventory -1, user_poisson_lambda)
                
                # probability of not meeting the demands
                transition_prob2 = 1- poisson.cdf(initial_inventory, user_poisson_lambda)
                
                # total reward
                
                reward = base_reward - stockout_cost
                (user_poisson_lambda*transition_prob1) - \
                        initial_inventory*transition_prob2
                
                if state in MarkovRewProcessDict:
                    
                    MarkovRewProcessDict[state][(0, beta1)]= (transition_prob,reward)
                    
                else:

                    MarkovRewProcessDict[state] = {(0, beta1 ):(transition_prob, reward)}

In [19]:
for (state, value) in MarkovRewProcessDict.items():
    
    print("The Current state is: {}".format(state))
    
    for (next_state, (trans_prob,reward)) in value.items():
        
        print("The Next State is {} \nwith Probability of: {} \
               \nand Reward: {}".format(next_state, trans_prob, reward))

The Current state is: (0, 0)
The Next State is (0, 2) 
with Probability of: 1.0                
and Reward: -10
The Current state is: (0, 1)
The Next State is (1, 1) 
with Probability of: 0.3678794411714424                
and Reward: 0
The Next State is (0, 1) 
with Probability of: 0.6321205588285576                
and Reward: -10
The Current state is: (0, 2)
The Next State is (2, 0) 
with Probability of: 0.3678794411714424                
and Reward: 0
The Next State is (1, 0) 
with Probability of: 0.3678794411714424                
and Reward: 0
The Next State is (0, 0) 
with Probability of: 0.26424111765711533                
and Reward: -10
The Current state is: (1, 0)
The Next State is (1, 1) 
with Probability of: 0.3678794411714424                
and Reward: -1
The Next State is (0, 1) 
with Probability of: 0.6321205588285576                
and Reward: -11
The Current state is: (1, 1)
The Next State is (2, 0) 
with Probability of: 0.3678794411714424                
and Reward

In [20]:
MarkovRewProcessDict

In [35]:
list_states = MarkovRewProcessDict.keys()

In [36]:
list_states

In [41]:
MarkovRewProcessDict[(0,0)][(0,2)][0]

In [37]:
for i, s in enumerate(list_states):
    for j, s1 in enumerate(list_states):

        

    print(i)

    print(s)
    #for j, s1 in enumerate(MarkovRewProcessDict.values()):
    #    print(j)
    #    print(s1)

0
(0, 0)
1
(0, 1)
2
(0, 2)
3
(1, 0)
4
(1, 1)
5
(2, 0)


In [22]:
for s, k in MarkovRewProcessDict.items():
    print(s)

(0, 0)
(0, 1)
(0, 2)
(1, 0)
(1, 1)
(2, 0)


In [23]:
import numpy as np

# Your dictionary
data_dict = {
    (0, 0): {(0, 2): (1.0, -10)},
    (0, 1): {
        (1, 1): (0.3678794411714424, 0),
        (0, 1): (0.6321205588285576, -10)
    },
    # ... (the rest of your dictionary)
}

# Extract unique row and column keys
row_keys = sorted(set(key[0] for key in data_dict.keys()))
col_keys = sorted(set(key[1] for key in data_dict.keys()))

# Initialize an empty matrix with NaN values
matrix = np.empty((len(row_keys), len(col_keys)), dtype=object)
matrix[:] = np.nan

# Populate the matrix with probabilities from the dictionary
for row_idx, row_key in enumerate(row_keys):
    for col_idx, col_key in enumerate(col_keys):
        if (row_key, col_key) in data_dict:
            probabilities = data_dict[(row_key, col_key)]
            matrix[row_idx, col_idx] = probabilities

print(matrix)


[[{(0, 2): (1.0, -10)}
  {(1, 1): (0.3678794411714424, 0), (0, 1): (0.6321205588285576, -10)}]]


In [24]:
import numpy as np

# Create a dictionary
my_dict = {
    (0, 0): 1,
    (0, 1): 2,
    (1, 0): 3,
    (1, 1): 4
}

# Extract keys and values from the dictionary
keys = list(my_dict.keys())
values = list(my_dict.values())

# Determine the dimensions of the matrix
n_rows = len(set(key[0] for key in keys))
n_cols = len(set(key[1] for key in keys))

# Create an empty matrix filled with zeros
matrix = np.zeros((n_rows, n_cols))

# Populate the matrix with values from the dictionary
for key, value in zip(keys, values):
    matrix[key[0], key[1]] = value

print(matrix)


[[1. 2.]
 [3. 4.]]
