In [1]:
import numpy as np

In [2]:
def connectionist_temporal_classification(X, Y, num_labels):
    # Input:
    # X: Input sequence (shape: (input_seq_length, input_feature_dim))
    # Y: Output sequence (shape: (output_seq_length,))
    # num_labels: Total number of labels (including the blank label)
    
    # Step 1: Initialization
    T, input_dim = X.shape
    output_seq_length = len(Y)
    blank_label = num_labels - 1

    # Step 2: Forward Algorithm
    forward_probs = np.zeros((T, num_labels))
    forward_probs[0, Y[0]] = 1

    for t in range(1, T):
        for label in range(num_labels):
            prob = forward_probs[t-1, label]
            if label == Y[t]:
                prob += forward_probs[t-1, label]
            if Y[t] == blank_label:
                prob += forward_probs[t-1, blank_label]
            forward_probs[t, label] = prob

    # Step 3: Backward Algorithm
    backward_probs = np.zeros((T, num_labels))
    backward_probs[T-1, Y[-1]] = 1

    for t in range(T-2, -1, -1):
        for label in range(num_labels):
            prob = backward_probs[t+1, label]
            if label == Y[t+1]:
                prob += backward_probs[t+1, label]
            if Y[t+1] == blank_label:
                prob += backward_probs[t+1, blank_label]
            backward_probs[t, label] = prob

    # Step 4: Compute Aligned Probabilities
    aligned_probs = forward_probs * backward_probs
    aligned_probs /= np.sum(aligned_probs, axis=1, keepdims=True)

    # Step 5: Remove Repetitions and Blank Labels
    non_blank_probs = aligned_probs[:, :-1]
    non_blank_probs = np.maximum(non_blank_probs, np.zeros_like(non_blank_probs))

    # Step 6: Compute Loss
    loss = -np.log(np.sum(non_blank_probs, axis=1)).sum()

    return loss

In [3]:
# Example Usage:
input_seq = np.random.rand(100, 20)  # Example input sequence of length 100 and 20 features
output_seq = [1, 3, 2]  # Example output sequence
num_labels = 5  # Including the blank label


In [4]:
ctc_loss = connectionist_temporal_classification(input_seq, output_seq, num_labels)
print("CTC Loss:", ctc_loss)

IndexError: list index out of range

In [5]:
import numpy as np

def connectionist_temporal_classification(X, Y, num_labels):
    # Input:
    # X: Input sequence (shape: (input_seq_length, input_feature_dim))
    # Y: Output sequence (shape: (output_seq_length,))
    # num_labels: Total number of labels (including the blank label)
    
    # Step 1: Initialization
    T, input_dim = X.shape
    output_seq_length = len(Y)
    blank_label = num_labels - 1

    # Step 2: Forward Algorithm
    forward_probs = np.zeros((T, num_labels))
    forward_probs[0, Y[0]] = 1

    for t in range(1, T):
        for label in range(num_labels):
            prob = forward_probs[t-1, label]
            if label == Y[t]:
                prob += forward_probs[t-1, label]
            if Y[t] == blank_label:
                prob += forward_probs[t-1, blank_label]
            forward_probs[t, label] = prob

    # Step 3: Backward Algorithm
    backward_probs = np.zeros((T, num_labels))
    backward_probs[T-1, Y[output_seq_length - 1]] = 1

    for t in range(T-2, -1, -1):
        for label in range(num_labels):
            prob = backward_probs[t+1, label]
            if label == Y[t + 1]:
                prob += backward_probs[t+1, label]
            if Y[t + 1] == blank_label:
                prob += backward_probs[t+1, blank_label]
            backward_probs[t, label] = prob

    # Step 4: Compute Aligned Probabilities
    aligned_probs = forward_probs * backward_probs
    aligned_probs /= np.sum(aligned_probs, axis=1, keepdims=True)

    # Step 5: Remove Repetitions and Blank Labels
    non_blank_probs = aligned_probs[:, :-1]
    non_blank_probs = np.maximum(non_blank_probs, np.zeros_like(non_blank_probs))

    # Step 6: Compute Loss
    loss = -np.log(np.sum(non_blank_probs, axis=1)).sum()

    return loss

# Example Usage:
input_seq = np.random.rand(100, 20)  # Example input sequence of length 100 and 20 features
output_seq = [1, 3, 2]  # Example output sequence
num_labels = 5  # Including the blank label

ctc_loss = connectionist_temporal_classification(input_seq, output_seq, num_labels)
print("CTC Loss:", ctc_loss)


IndexError: list index out of range

In [6]:
# Example Usage:
input_seq = np.random.rand(100, 20)  # Example input sequence of length 100 and 20 features
output_seq = [1, 3, 2]  # Example output sequence

# Ensure the output sequence contains valid label indices (within the range of num_labels)
num_labels = 5  # Including the blank label
valid_label_indices = list(range(num_labels))
output_seq = [label for label in output_seq if label in valid_label_indices]

# Ensure the length of the output sequence is less than or equal to the length of the input sequence
input_seq_length = input_seq.shape[0]
output_seq_length = len(output_seq)
if output_seq_length > input_seq_length:
    output_seq = output_seq[:input_seq_length]

# Call the CTC algorithm
ctc_loss = connectionist_temporal_classification(input_seq, output_seq, num_labels)
print("CTC Loss:", ctc_loss)

IndexError: list index out of range

In [8]:
# Example Usage:
X = np.random.rand(100, 20)  # Example input sequence of length 100 and 20 features
Y = [1, 3, 2]  # Example output sequence
num_labels = 5  # Including the blank label

# Step 1: Validate input data and output sequence
if len(X.shape) != 2 or len(Y) == 0:
    raise ValueError("Invalid input data or output sequence")

# Step 2: Ensure num_labels includes the blank label and is greater than max label index in Y
valid_label_indices = list(range(num_labels))
if not all(label in valid_label_indices for label in Y):
    raise ValueError("Invalid label indices in output sequence")

# Step 3: Ensure the output sequence length is less than or equal to the input sequence length
input_seq_length = X.shape[0]
output_seq_length = len(Y)
if output_seq_length > input_seq_length:
    output_seq = output_seq[:input_seq_length]

# Call the CTC algorithm with the corrected input data and output sequence
ctc_loss = connectionist_temporal_classification(input_seq, output_seq, num_labels)
print("CTC Loss:", ctc_loss)

IndexError: list index out of range

In [21]:
X = np.random.rand(4 , 20)
Y = [1,2]
num_labels = 8
T, input_dim = X.shape # 4 , 20
output_seq_length = len(Y) # 3
blank_label = num_labels - 1 # 7

#######################################

# Step 2: Forward Algorithm
forward_probs = np.zeros((T, num_labels))
forward_probs[0, Y[0]] = 1

""" array([[0., 1., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0.]])  
"""

for t in range(1, T): # (1,4)
    for label in range(num_labels): (8)
        prob = forward_probs[t-1, label]
        if label == Y[t]:
            prob += forward_probs[t-1, label]
        if Y[t] == blank_label:
            prob += forward_probs[t-1, blank_label]
        forward_probs[t, label] = prob

In [22]:
a  = np.zeros((T, num_labels))
a[0, Y[0]] = 1
a

array([[0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.]])

In [None]:


import numpy as np

def connectionist_temporal_classification(X, Y, num_labels):
    # Input:
    # X: Input sequence (shape: (input_seq_length, input_feature_dim))
    # Y: Output sequence (shape: (output_seq_length,))
    # num_labels: Total number of labels (including the blank label)
    
    # Step 1: Initialization
    T, input_dim = X.shape
    output_seq_length = len(Y)
    blank_label = num_labels - 1

    # Step 2: Forward Algorithm
    forward_probs = np.zeros((T, num_labels))
    forward_probs[0, Y[0]] = 1

    for t in range(1, T):
        for label in range(num_labels):
            prob = forward_probs[t-1, label]
            if label == Y[t]:
                prob += forward_probs[t-1, label]
            if Y[t] == blank_label:
                prob += forward_probs[t-1, blank_label]
            forward_probs[t, label] = prob

    # Step 3: Backward Algorithm
    backward_probs = np.zeros((T, num_labels))
    backward_probs[T-1, Y[output_seq_length - 1]] = 1

    for t in range(T-2, -1, -1):
        for label in range(num_labels):
            prob = backward_probs[t+1, label]
            if label == Y[t + 1]:
                prob += backward_probs[t+1, label]
            if Y[t + 1] == blank_label:
                prob += backward_probs[t+1, blank_label]
            backward_probs[t, label] = prob

    # Step 4: Compute Aligned Probabilities
    aligned_probs = forward_probs * backward_probs
    aligned_probs /= np.sum(aligned_probs, axis=1, keepdims=True)

    # Step 5: Remove Repetitions and Blank Labels
    non_blank_probs = aligned_probs[:, :-1]
    non_blank_probs = np.maximum(non_blank_probs, np.zeros_like(non_blank_probs))

    # Step 6: Compute Loss
    loss = -np.log(np.sum(non_blank_probs, axis=1)).sum()

    return loss

# Example Usage:
input_seq = np.random.rand(100, 20)  # Example input sequence of length 100 and 20 features
output_seq = [1, 3, 2]  # Example output sequence
num_labels = 5  # Including the blank label

ctc_loss = connectionist_temporal_classification(input_seq, output_seq, num_labels)
print("CTC Loss:", ctc_loss)


In [1]:
def find_subwords(word_list, word_str):
    n = len(word_str)
    dp = [[] for _ in range(n + 1)]
    dp[0] = [[]]

    for end in range(1, n + 1):
        for start in range(end):
            subword = word_str[start:end]
            if subword in word_list and dp[start]:
                for prev_comb in dp[start]:
                    dp[end].append(prev_comb + [subword])

    return dp[n]

# Test
input_list = ['ad', 'a', 'd', 'n', 'd', 'an', 'nd']
input_word = 'and'
output = find_subwords(input_list, input_word)
print(output) 

[['a', 'nd'], ['an', 'd'], ['a', 'n', 'd']]


In [2]:
n = 5
dp = [[] for _ in range(n + 1)]
dp[0] = [[]]

In [3]:
dp

[[[]], [], [], [], [], []]