In [1]:
import numpy as np

# ----------------------------------------
# 1. Vocabulary (3 labels + blank)
# ----------------------------------------
labels = ["a", "b", "c"]
blank = "_"


In [2]:
# We map: a=0, b=1, c=2, blank=3
symbols = labels + [blank]
print(symbols)

['a', 'b', 'c', '_']


In [3]:
symbol_to_idx = {s:i for i,s in enumerate(symbols)}
print(symbol_to_idx)

{'a': 0, 'b': 1, 'c': 2, '_': 3}


In [4]:
idx_to_symbol = {i:s for s,i in symbol_to_idx.items()}
print(idx_to_symbol)

{0: 'a', 1: 'b', 2: 'c', 3: '_'}


In [5]:
# ----------------------------------------
# 2. Target sequence (no blanks)
# ----------------------------------------
y = ["a", "b"]   # students will compute CTC alignment over this

In [6]:
# ----------------------------------------
# 3. Simulated softmax probabilities from a neural net
#    Shape: T=4 time steps, 4 symbols (a,b,c,blank)
# ----------------------------------------
log_probs = np.log(np.array([
    [0.6, 0.2, 0.1, 0.1],   # t=0
    [0.1, 0.7, 0.1, 0.1],   # t=1
    [0.1, 0.1, 0.7, 0.1],   # t=2
    [0.1, 0.6, 0.1, 0.2],   # t=3
]))
T, V = log_probs.shape
print("log_probs shape =", log_probs.shape)

log_probs shape = (4, 4)


In [7]:
# ----------------------------------------
# 4. Build extended target with blanks: e.g. y=["a","b"] -> ["","a","","b","_"]
# ----------------------------------------
extended = []
for ch in y:
    extended.append(blank)
    extended.append(ch)
    print("Adding symbol", ch, "-> extended target =", extended)
extended.append(blank)

print("Extended target =", extended)

S = len(extended)
target_idx = np.array([symbol_to_idx[s] for s in extended])
print("Extended target indices =", target_idx)
print("S =", S)

Adding symbol a -> extended target = ['_', 'a']
Adding symbol b -> extended target = ['_', 'a', '_', 'b']
Extended target = ['_', 'a', '_', 'b', '_']
Extended target indices = [3 0 3 1 3]
S = 5


In [8]:
# ----------------------------------------
# 5. STUDENTS MUST IMPLEMENT THIS:
# ----------------------------------------

def ctc_forward(log_probs, target_idx):
    """
    log_probs: (T, V) matrix of log probabilities
    target_idx: list of indices for the extended target sequence
                length S = 2*len(y) + 1

    Return:
        alpha: (T, S) matrix of forward probabilities in NORMAL space (not log)
               (students may choose log-space, but normal-space is fine here)
    """
    T, V = log_probs.shape
    S = len(target_idx)

    # Create alpha matrix initialized to zeros
    alpha = np.zeros((T, S))

    # ----- YOUR CODE HERE -----
    # Initialize base cases (t=0)
    alpha[0, 0] = np.exp(log_probs[0, target_idx[0]])

    if S > 1:
        alpha[0,1] = np.exp(log_probs[0, target_idx[1]]) # From blank to first symbol

    # Fill the alpha matrix
    for t in range(1, T):
        for s in range(S):
            curr_s = target_idx[s] # Current symbol index
            prob = np.exp(log_probs[t, curr_s]) # Emission probability at time t for symbol s

            # From same state
            alpha[t,s] += alpha[t-1,s] * prob # Stay at same symbol (from blank or repeated symbol)
            # From previous state
            if s-1 >= 0:
                alpha[t,s] += alpha[t-1,s-1] * prob # Move to next symbol (from blank or previous symbol)
            # From two states back (skip blank)
            if s-2 >= 0 and target_idx[s] != target_idx[s-2]:
                alpha[t,s] += alpha[t-1,s-2] * prob # Skip blank to next symbol

    return alpha

In [9]:
# ----------------------------------------
# 6. Sanity check
# ----------------------------------------
alpha_test = ctc_forward(log_probs, target_idx)
print("alpha shape should be (T,S) =", alpha_test.shape)
alpha_test

alpha shape should be (T,S) = (4, 5)


array([[1.00e-01, 6.00e-01, 0.00e+00, 0.00e+00, 0.00e+00],
       [1.00e-02, 7.00e-02, 6.00e-02, 4.20e-01, 0.00e+00],
       [1.00e-03, 8.00e-03, 1.30e-02, 5.50e-02, 4.20e-02],
       [2.00e-04, 9.00e-04, 4.20e-03, 4.56e-02, 1.94e-02]])