# Markov Chain

In [1]:
import pandas as pd
import numpy as np
import random

![](weather.png)

In [2]:
data = ['cold', 'cold', 'hot', 'cold', 'cold', 'hot', 'cold', 'hot', 'hot']

### 1. Convert the data to a DataFrame
with a single column `weather`

In [3]:
states = pd.DataFrame({'weather': data})
states

Unnamed: 0,weather
0,cold
1,cold
2,hot
3,cold
4,cold
5,hot
6,cold
7,hot
8,hot


### 2. Identify transitions
create another column so that we have the columns:

* $Y_t$ – the current state
* $Y_{t-1}$ –  the state before

In [4]:
states['weather_tomorrow'] = states['weather'].shift(-1)
states

Unnamed: 0,weather,weather_tomorrow
0,cold,cold
1,cold,hot
2,hot,cold
3,cold,cold
4,cold,hot
5,hot,cold
6,cold,hot
7,hot,hot
8,hot,


### 3. Calulate a transition matrix


In [11]:
# notes about normalize use 
# If passed ‘index’ will normalize over each row.
# default is False , which will give you the count

P = pd.crosstab(
    states['weather'], 
    states['weather_tomorrow'], normalize='index')
P

weather_tomorrow,cold,hot
weather,Unnamed: 1_level_1,Unnamed: 2_level_1
cold,0.4,0.6
hot,0.666667,0.333333


In [12]:
# so only count , you can do like this

pd.crosstab(
    states['weather'], 
    states['weather_tomorrow'])

weather_tomorrow,cold,hot
weather,Unnamed: 1_level_1,Unnamed: 2_level_1
cold,2,3
hot,2,1


### 4. Calulate probabilities for the next day

In [13]:
# first cold, then hot (order of values inside)

initial_state = np.array([0, 1])
np.dot(initial_state, P)

array([0.66666667, 0.33333333])

### 5. Calulate probabilities two days ahead

In [14]:
initial_state.dot(P).dot(P)

array([0.48888889, 0.51111111])

### 6. Calculate the probabilities many days ahead

In [15]:
initial_state.dot(P).dot(P).dot(P).dot(P).dot(P).dot(P).dot(P).dot(P).dot(P).dot(P).dot(P)

array([0.52631604, 0.47368396])

### 7. Weighted Random Choices with probabilities

In [35]:
# convery the transition matrix to a dictionary that maps state to a list of probabilities
# order such that the first value is transition probability to cold and the second value
# is transition probability to hot

probs = P.to_dict(orient='index')
probs

{'cold': {'cold': 0.4, 'hot': 0.6},
 'hot': {'cold': 0.6666666666666666, 'hot': 0.3333333333333333}}

In [42]:
# convert internal values, which is a dict to a list
for key in probs.keys():
    probs[key] = list(probs[key].values())

In [43]:
probs

{'cold': [0.4, 0.6], 'hot': [0.6666666666666666, 0.3333333333333333]}

In [44]:
# declare possible states in a list and initiate current state 

states = ['cold', 'hot']
current_state = 'cold'


In [47]:
# Randomly choose a next state based on the probability transition matrix 

random.choices(states, weights=probs[current_state])

['cold']

In [49]:
# to get the actual string representing the state, you then need to do the following 

random.choices(states, weights=probs[current_state])[0]

'cold'