# CTC implementation

This notebook implements a CTC layer based on char model using Numpy.

In [1]:
import numpy as np

## Utils

First, let's define some util functions, such as padding target transcripts, generating random CTC output, and the Softmax function.

In [34]:
def _pad_tgt_seq(tgt: str):
    return f"^{'^'.join(tgt.lower())}^"

def _random_ctc_output(T, N=28):
    return np.random.random((T, N))

def softmax(in_arr):
    exp_arr = np.exp(in_arr)

    return exp_arr / np.sum(exp_arr, axis=1, keepdims=True)

## Build Vocabulary

In [89]:
from typing import List

class CharModel:
    chars = "abcdefghijklmnopqrstuvwxyz"
    SPACE = " "
    BLANK_SYMBOL = '^'
    
    char_to_ind = {char: ind for ind, char in enumerate(chars + SPACE + BLANK_SYMBOL)}
    ind_to_char = {ind: char for ind, char in enumerate(chars + SPACE + BLANK_SYMBOL)}

    Vocab_Size = len(chars) + 2

    @staticmethod
    def encode(padded_seq: str):
        return np.fromiter(map(
                lambda x: CharModel.char_to_ind[x], [c for c in padded_seq.lower()]
            ), 
            dtype=np.int64)
    
    @staticmethod
    def decode(ind_seq: List[int]):
        return list(map(lambda x: CharModel.ind_to_char[x], ind_seq))

In [90]:
CharModel.encode("I live")

array([ 8, 26, 11,  8, 21,  4], dtype=int64)

## CTC Layer

Let's build [CTC](https://www.cs.toronto.edu/~graves/icml_2006.pdf) layer.

In [129]:
class CTC:
    def __init__(self, output, tgt):
        self.tgt = tgt
        self.output = output

        self.padded_tgt = CharModel.encode(_pad_tgt_seq(tgt))

        self.alpha = np.zeros((self.output.shape[0], self.padded_tgt.shape[0])) # forward variable alpha

        self.beta = np.zeros((self.output.shape[0], self.padded_tgt.shape[0])) # backward variable beta

        self.gd = np.zeros_like(output) # gradient

    def _alpha(self, t, s):
        """formula (6) in Graves et al., 2006
        """
        if s < 0 or s >= self.padded_tgt.shape[0]:
            return 0
        
        curr_char = self.padded_tgt[s]
        curr_score = self.output[t, curr_char]

        if t == 0:
            if s == 0:
                return self.output[0, CharModel.char_to_ind[CharModel.BLANK_SYMBOL]]
            elif s == 1:
                return curr_score
            else:
                return 0
        
        alpha_tgt_t_s = self.alpha[t - 1, s] + (self.alpha[t - 1, s - 1] if s - 1 >= 0 else 0)

        if curr_char == CharModel.BLANK_SYMBOL or (s >= 2 and self.padded_tgt[s - 2] == curr_char):
            return alpha_tgt_t_s * curr_score
        else:
            return (alpha_tgt_t_s + (self.alpha[t - 1, s - 2] if s - 2 >= 0 else 0)) * curr_score     

    def _beta(self, t, s):
        """formula (10) in Graves et al., 2006
        """
        if s < 0 or s >= self.padded_tgt.shape[0]:
            return 0
        
        curr_char = self.padded_tgt[s]
        curr_score = self.output[t, curr_char]

        last_time_step = self.output.shape[0] - 1
        last_char_ind = self.padded_tgt.shape[0] - 1

        if t == last_time_step:
            if s == last_char_ind:
                return self.output[t, CharModel.char_to_ind[CharModel.BLANK_SYMBOL]]
            elif s == self.padded_tgt.shape[0] - 2:
                return curr_score
            else:
                return 0
            
        beta_tgt_t_s = self.beta[t + 1, s] + (
            self.beta[t + 1, s + 1] if s + 1 <= last_char_ind else 0
        )

        if curr_char == CharModel.BLANK_SYMBOL or \
            (s + 2 <= last_char_ind and self.padded_tgt[s + 2] == curr_char):
            return beta_tgt_t_s * curr_score
        else:
            return (beta_tgt_t_s + 
                    self.beta[t + 1, s + 2] if s + 2 <= last_char_ind else 0
                    ) * curr_score

    def forward(self):
        for t in range(self.alpha.shape[0]):
            for s in range(self.alpha.shape[1]):
                self.alpha[t, s] = self._alpha(t, s)

    def backward(self):
        for t in range(self.beta.shape[0] - 1, -1, -1):
            for s in range(self.beta.shape[1] - 1, -1, -1):
                self.beta[t, s] = self._beta(t, s)

    def gradient(self):
        """formula (15) in Graves et al., 2006
        """
        seq_prob = self.alpha[-1, -1] + self.alpha[-1, -2]
        for t in range(self.output.shape[0]):
            for k in range(self.output.shape[1]):
                lab_l_k = np.argwhere(self.padded_tgt == k)
                d_p_d_ytk = 0.0

                if lab_l_k.shape[0] == 0:
                    continue

                for s in lab_l_k:
                    d_p_d_ytk += self.alpha[t, s] * self.beta[t, s]
                
                d_p_d_ytk /= self.output[t, k] ** 2
                d_logP_d_ytk = (1. / seq_prob) * d_p_d_ytk
                self.gd[t, k] = d_logP_d_ytk[0]

In [130]:
arr = _random_ctc_output(10)
tgt = "I am"
ctc = CTC(arr, tgt)

In [131]:
ctc.forward()
ctc.backward()

In [132]:
ctc.gradient()