### Simple example of conditional probability for sequential steps

In [1]:
import numpy as np
import random as rm

In [2]:
# The statespace
states = ["Facebook_Ad","Email_click","Product_page"]

# Possible sequences of events
transitionName = [["FF","FE","FP"],
                  ["EF","EE","EP"],
                  ["PF","PE","PP"]]

# Probabilities matrix (transition matrix)
transitionMatrix = [[0.2,0.6,0.2],
                    [0.1,0.6,0.3],
                    [0.2,0.7,0.1]]

### This step checks if all probabilties add up to 1 for all possibilities

In [3]:
if sum(transitionMatrix[0])+sum(transitionMatrix[1])+sum(transitionMatrix[1]) != 3:
    print("Somewhere, something went wrong. Transition matrix, perhaps?")
else: print("All is gonna be okay, you should move on!! ;)")

All is gonna be okay, you should move on!! ;)


Now let's code the real thing. You will use the numpy.random.choice to generate a random sample from the set of transitions possible. While most of its arguments are self-explanatory, the p might not be. It is an optional argument that lets you enter the probability distribution for the sampling set, which is the transition matrix in this case.

In [4]:
# A function that implements the Markov model to forecast the state/mood.
def activity_forecast(days):
    # Choose the starting state
    activityToday = "Email"
    print("Start state: " + activityToday)
    # Shall store the sequence of states taken. So, this only has the starting state for now.
    activityList = [activityToday]
    i = 0
    # To calculate the probability of the activityList
    prob = 1
    while i != days:
        if activityToday == "Facebook_Ad":
            change = np.random.choice(transitionName[0],replace=True,p=transitionMatrix[0])
            if change == "FF":
                prob = prob * transitionMatrix[0][0]
                activityList.append("Facebook_Ad")
                pass
            elif change == "FE":
                prob = prob * transitionMatrix[0][1]
                activityToday = "Email"
                activityList.append("Email")
            else:
                prob = prob * transitionMatrix[0][2]
                activityToday = "Product_Page"
                activityList.append("Product_Page")
        elif activityToday == "Email":
            change = np.random.choice(transitionName[1],replace=True,p=transitionMatrix[1])
            if change == "EE":
                prob = prob * transitionMatrix[1][1]
                activityList.append("Email")
                pass
            elif change == "EF":
                prob = prob * transitionMatrix[1][0]
                activityToday = "Facebook_Ad"
                activityList.append("Facebook_Ad")
            else:
                prob = prob * transitionMatrix[1][2]
                activityToday = "Product_Page"
                activityList.append("Product_Page")
        elif activityToday == "Product_Page":
            change = np.random.choice(transitionName[2],replace=True,p=transitionMatrix[2])
            if change == "PP":
                prob = prob * transitionMatrix[2][3]
                activityList.append("Product_Page")
                pass
            elif change == "PF":
                prob = prob * transitionMatrix[2][0]
                activityToday = "Facebook_Ad"
                activityList.append("Facebook_Ad")
            else:
                prob = prob * transitionMatrix[2][1]
                activityToday = "Email"
                activityList.append("Email")
        i += 1  
    print("Possible states: " + str(activityList))
    print("End state after "+ str(days) + " days: " + activityToday)
    print("Probability of the possible sequence of states: " + str(prob))

# Function that forecasts the possible state for next 2 days; can be applied to n days to forecast customer journey
activity_forecast(2)

Start state: Email
Possible states: ['Email', 'Product_Page', 'Facebook_Ad']
End state after 2 days: Facebook_Ad
Probability of the possible sequence of states: 0.06


You get a random set of transitions possible along with the probability of it happening, starting from state: 'facebook'. Extend the program further to maybe iterate it for a couple of hundred times with the same starting state, you can then see the expected probability of ending at any particular state along with its probability. Let's rewrite the function activity_forecast and add a fresh set of loops to do this...

In [5]:
def activity_forecast(days):
    # Choose the starting state
    activityToday = "Product_Page"
    # Shall store the sequence of states taken. So, this only has the starting state for now.
    activityList = [activityToday]
    i = 0
    # To calculate the probability of the activityList
    prob = 1
    while i != days:
        if activityToday == "Facebook_Ad":
            change = np.random.choice(transitionName[0],replace=True,p=transitionMatrix[0])
            if change == "FF":
                prob = prob * 0.2
                activityList.append("Facebook_Ad")
                pass
            elif change == "FE":
                prob = prob * 0.6
                activityToday = "Email"
                activityList.append("Email")
            else:
                prob = prob * 0.2
                activityToday = "Product_Page"
                activityList.append("Product_Page")
        elif activityToday == "Email":
            change = np.random.choice(transitionName[1],replace=True,p=transitionMatrix[1])
            if change == "EE":
                prob = prob * 0.5
                activityList.append("Email")
                pass
            elif change == "EF":
                prob = prob * 0.2
                activityToday = "Facebook_Ad"
                activityList.append("Facebook_Ad")
            else:
                prob = prob * 0.3
                activityToday = "Product_Page"
                activityList.append("Product_Page")
        elif activityToday == "Product_Page":
            change = np.random.choice(transitionName[2],replace=True,p=transitionMatrix[2])
            if change == "PP":
                prob = prob * 0.1
                activityList.append("Product_Page")
                pass
            elif change == "PF":
                prob = prob * 0.2
                activityToday = "Facebook_Ad"
                activityList.append("Facebook_Ad")
            else:
                prob = prob * 0.7
                activityToday = "Email"
                activityList.append("Email")
        i += 1 
    return activityList

# To save every activityList
list_activity = []
count = 0

# `Range` starts from the first count up until but excluding the last count
for iterations in range(1,10000):
        list_activity.append(activity_forecast(2))

# Check out all the `activityList` we collected    
#print(list_activity)

# Iterate through the list to get a count of all activities ending in state:'Email'
for smaller_list in list_activity:
    if(smaller_list[2] == "Email"):
        count += 1

# Calculate the probability of starting from state:'Facebook' and ending at state:'Email'
percentage = (count/10000) * 100
print("The probability of starting at state:'Facebook' and ending at state:'Email'= " + str(percentage) + "%")

The probability of starting at state:'Facebook' and ending at state:'Email'= 60.39%


How did we approximate towards the desired 62%?

Note This is actually the "law of large numbers", which is a principle of probability that states that the frequencies of events with the same likelihood of occurrence even out, but only if there are enough trials or instances. In other words, as the number of experiments increases, the actual ratio of outcomes will converge on a theoretical or expected ratio of outcomes.



## Markov Chains States Simulator

In [6]:
import numpy as np


#Create a class that isntatitates current state

class MarkovChain(object):
    def __init__(self, transition_prob):
        """
        Initialize the MarkovChain instance.
 
        Parameters
        ----------
        transition_prob: dict
            A dict object representing the transition 
            probabilities in Markov Chain. 
            Should be of the form: 
                {'state1': {'state1': 0.1, 'state2': 0.4}, 
                 'state2': {...}}
        """
        self.transition_prob = transition_prob
        self.states = list(transition_prob.keys())

#write a method that predicts most likely next state

    def next_state(self, current_state):
        """
        Returns the state of the random variable at the next time 
        instance.
 
        Parameters
        ----------
        current_state: str
            The current state of the system.
        """
        return np.random.choice(
            self.states, 
            p=[self.transition_prob[current_state][next_state] 
               for next_state in self.states]
        )

#write a method that predicts most likely customer jouurney for a dfined number of actions till conversion

    def generate_states(self, current_state, no=10):
        """
        Generates the next states of the system.
 
        Parameters
        ----------
        current_state: str
            The state of the current random variable.
 
        no: int
            The number of future states to generate.
        """
        future_states = []
        for i in range(no):
            next_state = self.next_state(current_state)
            future_states.append(next_state)
            current_state = next_state
        return future_states

### Create State Transition Probability matrix / nested dictionary

In [9]:
transition_prob = {'Facebook': {'Facebook': 0.8, 'Email': 0.19, 
 'Product_Page': 0.01},
 'Email': {'Facebook': 0.2, 'Email': 0.7,
 'Product_Page': 0.1},
 'Product_Page': {'Facebook': 0.1, 'Email': 0.2,
 'Product_Page': 0.7}}
 
weather_chain = MarkovChain(transition_prob=transition_prob)

In [10]:
#Predict Next State from current state

weather_chain.next_state(current_state='Facebook')

'Facebook'

In [8]:
weather_chain.generate_states(current_state='Product_Page', no=10)     

['Email',
 'Email',
 'Email',
 'Email',
 'Facebook',
 'Facebook',
 'Facebook',
 'Facebook',
 'Facebook',
 'Facebook']

### Same function, using probability matrix instead of nested dictionary

In [26]:
import numpy as np
 
class MarkovChain(object):
    def __init__(self, transition_matrix, states):
        """
        Initialize the MarkovChain instance.
 
        Parameters
        ----------
        transition_matrix: 2-D array
            A 2-D array representing the probabilities of change of 
            state in the Markov Chain.
 
        states: 1-D array 
            An array representing the states of the Markov Chain. It
            needs to be in the same order as transition_matrix.
        """
        self.transition_matrix = np.atleast_2d(transition_matrix)
        self.states = states
        self.index_dict = {self.states[index]: index for index in 
                           range(len(self.states))}
        self.state_dict = {index: self.states[index] for index in
                           range(len(self.states))}
 
    def next_state(self, current_state):
        """
        Returns the state of the random variable at the next time 
        instance.
 
        Parameters
        ----------
        current_state: str
            The current state of the system.
        """
        return np.random.choice(
         self.states, 
         p=self.transition_matrix[self.index_dict[current_state], :]
        )
 
    def generate_states(self, current_state, no=10):
        """
        Generates the next states of the system.
 
        Parameters
        ----------
        current_state: str
            The state of the current random variable.
 
        no: int
            The number of future states to generate.
        """
        future_states = []
        for i in range(no):
            next_state = self.next_state(current_state)
            future_states.append(next_state)
            current_state = next_state
        return future_states

In [27]:
>>> transition_matrix = [[0.8, 0.19, 0.01],
                         [0.2,  0.7,  0.1],
                         [0.1,  0.2,  0.7]]
>>> weather_chain = MarkovChain(transition_matrix=transition_matrix,
                                states=['Facebook', 'Email', 'Product_page'])
>>> weather_chain.next_state(current_state='Facebook')



'Facebook'

In [28]:
weather_chain.next_state(current_state='Email')

'Email'

In [29]:
weather_chain.generate_states(current_state='Product_page', no=7)

['Product_page',
 'Product_page',
 'Email',
 'Email',
 'Email',
 'Facebook',
 'Facebook']

### How many actions does it take on average to conversion?

In [30]:
#If the answer is 5, then let's generate a predicted journey map

weather_chain.generate_states(current_state='Product_page', no=5)

['Product_page', 'Facebook', 'Facebook', 'Facebook', 'Email']