# Markov Chain

In [10]:
import pandas as pd
import numpy as np
import seaborn as sns

![](weather.png)

In [2]:
data = ['cold', 'cold', 'hot', 'cold', 'cold', 'hot', 'cold', 'hot', 'hot']

### 1. Convert the data to a DataFrame
with a single column `weather`

In [4]:
df = pd.DataFrame(data, columns=['weather'])
df

Unnamed: 0,weather
0,cold
1,cold
2,hot
3,cold
4,cold
5,hot
6,cold
7,hot
8,hot


### 2. Identify transitions
create another column so that we have the columns:

* $Y_t$ – the current state
* $Y_{t-1}$ –  the state before

In [5]:
df['before']=df['weather'].shift(1)
df

Unnamed: 0,weather,before
0,cold,
1,cold,cold
2,hot,cold
3,cold,hot
4,cold,cold
5,hot,cold
6,cold,hot
7,hot,cold
8,hot,hot


### 3. Count transitions
Count the absolute number of each possible transition

In [9]:
(df['before']+' --> '+ df['weather']).value_counts()

cold --> hot     3
cold --> cold    2
hot --> cold     2
hot --> hot      1
dtype: int64

In [17]:
df['dummy'] = 1 # adds a dummy column to allow to group by two columns


In [18]:
df.groupby(['before', 'weather'])['dummy'].count()

before  weather
cold    cold       2
        hot        3
hot     cold       2
        hot        1
Name: dummy, dtype: int64

In [21]:
df2=df.groupby(['before', 'weather'])['dummy'].count().unstack()
df2

weather,cold,hot
before,Unnamed: 1_level_1,Unnamed: 2_level_1
cold,2,3
hot,2,1


### 4. Calulate a transition matrix
The transition matrix $P$ has the element $p_{ij}$, with rows $i$ and columns $j$, such that:

$$
p_{ij} = P(Y_t = y_j | Y_{t-1} = y_i)
$$

For example $p_{0,1} = p_{cold, hot}$ is the probability of a hot day when it was cold the day before. 

In [33]:
P=(df2.T/df2.sum(axis=1)).T
P

weather,cold,hot
before,Unnamed: 1_level_1,Unnamed: 2_level_1
cold,0.4,0.6
hot,0.666667,0.333333


### 5. Calulate probabilities for the next day

In [41]:
day0 = np.array([0.0, 1.0]) # 0 items in cold and one in hot

day1 = np.dot(day0, P)
day1

array([0.66666667, 0.33333333])

In [43]:
day = pd.DataFrame()
day['0'] = day0


Unnamed: 0,0
0,0.0
1,1.0


In [48]:
day['1'] = ((day['0'].T).dot(P)).T


ValueError: matrices are not aligned

In [None]:
or i in range (1,20):
    day[str(i)] = day[str(i-1)].dot(P)

### 6. Calulate probabilities two days ahead

In [39]:
day2=np.dot(day1, P)
day2

array([0.48888889, 0.51111111])

### 7. Calculate the probabilities many days ahead

In [40]:
day2.dot(P).dot(P).dot(P).dot(P)

array([0.52612653, 0.47387347])