In [2]:
import numpy as np
import pandas as pd

def transition_matrix(seq, sort_states=True, normalize=False):
    """
    Creates a transition matrix from a sequence of states.

    Parameters:
    - seq: list of states (e.g., ['a', 'b', 'c'])
    - sort_states: if True, states are sorted alphabetically
                   if False, the order of first appearance is preserved
    - normalize: if True, returns a row-normalized probability matrix

    Returns:
    - pandas.DataFrame of transitions (counts or probabilities)
    """
    if len(seq) < 2:
        raise ValueError("Sequence must have at least two elements")

    # Get unique states in desired order
    if sort_states:
        states = sorted(set(seq))
    else:
        states = list(dict.fromkeys(seq))  # preserves first occurrence order

    idx_map = {state: i for i, state in enumerate(states)}
    n = len(states)

    # Initialize zero matrix
    count_matrix = np.zeros((n, n), dtype=int)

    # Fill transition counts
    for current, next_ in zip(seq, seq[1:]):
        if current in idx_map and next_ in idx_map:
            i, j = idx_map[current], idx_map[next_]
            count_matrix[i, j] += 1

    # Create DataFrame
    df = pd.DataFrame(count_matrix, index=states, columns=states)

    if normalize:
        # Convert counts to probabilities row-wise
        df = df.div(df.sum(axis=1), axis=0).fillna(0)

    return df

# ── Example usage ─────────────────────────────────────
x = ['a', 'b', 'd', 'a', 'c', 'b', 'a', 'd']

# Transition counts
transition_counts = transition_matrix(x, sort_states=True, normalize=False)
print("🔢 Transition Counts:")
print(transition_counts)

# Transition probabilities
transition_probs = transition_matrix(x, sort_states=True, normalize=True)
print("\n📊 Transition Probabilities:")
print(transition_probs.round(2))


🔢 Transition Counts:
   a  b  c  d
a  0  1  1  1
b  1  0  0  1
c  0  1  0  0
d  1  0  0  0

📊 Transition Probabilities:
     a     b     c     d
a  0.0  0.33  0.33  0.33
b  0.5  0.00  0.00  0.50
c  0.0  1.00  0.00  0.00
d  1.0  0.00  0.00  0.00
