<a href="https://colab.research.google.com/github/AnuragBarthwal/AnuragBarthwal/blob/main/Transition_Matrix_Markov-Chain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd

from google.colab import drive
drive.mount('/content/drive')

df = pd.read_excel('/content/drive/My Drive/MarkovChainAnalysis1.xlsx')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
ls_raw_time_series = df.loc[:,"AQI"].dropna().tolist()

In [16]:
import pandas as pd

def get_transition_tuples(ls):
    ''' Converts a time series into a list of transition tuples'''
    return [(ls[i-1], ls[i]) for i in range(1, len(ls))]

def get_transition_event(tup):
    ''' Converts a tuple into a discrete transition event '''
    if tup[1] < 100:
        transition_event = 'S'
    elif 100 <= tup[1] < 200:
        transition_event = 'M'
    elif 200 <= tup[1] < 300:
        transition_event = 'P'
    elif tup[1] >= 300:
        transition_event = 'H'
    return transition_event

# Derive single-step state transition tuples
ls_transitions = get_transition_tuples(ls_raw_time_series)

# Convert raw time series data into discrete events
ls_events = [get_transition_event(tup) for tup in ls_transitions]
ls_event_transitions = get_transition_tuples(ls_events)

# Create an index (list) of unique event types
ls_index = ['S', 'M', 'P', 'H']

# Initialize Markov transition matrix with zeros
df = pd.DataFrame(0, index=ls_index, columns=ls_index)

# Derive transition matrix (or co-occurrence matrix)
for i, j in ls_event_transitions:
    df.loc[i, j] += 1  # Update (i, j) element

''' Derive row-normalized transition matrix:
- Elements are normalized by row sum (fill NAs/NaNs with 0s)
- df.sum(axis=1) sums up each row, df.div(..., axis=0) then divides each column element
'''
df_rnorm = df.div(df.sum(axis=1), axis=0).fillna(0.00)

''' Derive column-normalized transition matrix:
- Elements are normalized by column sum (fill NAs/NaNs with 0s)
- df.sum(axis=0) sums up each col, df.div(..., axis=1) then divides each row element
'''
df_cnorm = df.div(df.sum(axis=0), axis=1).fillna(0.00)

print("Transition Matrix:")
print(df)
print("\nRow-normalized Transition Matrix:")
print(df_rnorm)
print("\nColumn-normalized Transition Matrix:")
print(df_cnorm)


Transition Matrix:
   S  M   P   H
S  1  0   2   0
M  0  2   4   1
P  1  3  17  12
H  1  2  10  96

Row-normalized Transition Matrix:
          S         M         P         H
S  0.333333  0.000000  0.666667  0.000000
M  0.000000  0.285714  0.571429  0.142857
P  0.030303  0.090909  0.515152  0.363636
H  0.009174  0.018349  0.091743  0.880734

Column-normalized Transition Matrix:
          S         M         P         H
S  0.333333  0.000000  0.060606  0.000000
M  0.000000  0.285714  0.121212  0.009174
P  0.333333  0.428571  0.515152  0.110092
H  0.333333  0.285714  0.303030  0.880734
