In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf
import numpy as np
from itertools import groupby
from tf.keras.layers import StringLookup

# --- 1. Define Character Set and Mappings ---
# This must be the same character set the model was trained on.
# The extra element at the end is the CTC 'blank' token.
CHARACTERS = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
              'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', ' ']
BLANK_INDEX = len(CHARACTERS)

char_to_num = StringLookup(
    vocabulary=CHARACTERS, mask_token=None
)
num_to_char = StringLookup(
    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
)

# --- 2. Define the Model Architecture ---
# This should be the exact same architecture that was used for training.
def build_model(input_shape, vocab_size):
    inputs = tf.keras.Input(shape=input_shape)
    
    # Simple CNN frontend
    x = tf.keras.layers.Conv2D(32, (3, 3), activation="relu", padding="same")(inputs)
    x = tf.keras.layers.MaxPooling2D((2, 2))(x)
    
    # Reshape for RNN
    _, time_dim, freq_dim, channel_dim = x.shape
    x = tf.keras.layers.Reshape((time_dim, freq_dim * channel_dim))(x)
    
    # RNN backend
    x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True))(x)
    
    # Output layer
    outputs = tf.keras.layers.Dense(units=vocab_size + 1, activation="softmax")(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

# --- 3. CTC Decoding Function ---
def ctc_greedy_decode(predictions):
    """Decodes the output of a CTC model using a simple greedy algorithm."""
    # Use argmax to get the most likely character index at each time step
    predicted_indices = np.argmax(predictions, axis=-1)
    
    # Collapse repeating characters and remove blank tokens
    decoded_indices = []
    for k, _ in groupby(predicted_indices):
        if k != BLANK_INDEX: # Ignore the blank token
            decoded_indices.append(k)
            
    # Convert the integer sequence back to a string
    decoded_text_tensor = num_to_char(tf.constant(decoded_indices))
    decoded_text = tf.strings.reduce_join(decoded_text_tensor).numpy().decode("utf-8")
    
    return decoded_text

# --- 4. Putting It All Together ---

# a) Create a dummy log Mel spectrogram (replace this with your actual data)
# Shape: (time_steps, num_mels, channels=1)
dummy_spectrogram = np.random.rand(150, 80, 1).astype(np.float32)

# b) Build the model and load weights
# For this example, we use the randomly initialized weights.
# In a real scenario, you would load your trained weights here.
# model.load_weights('path/to/your/trained_weights.h5')
input_shape = dummy_spectrogram.shape
model = build_model(input_shape, len(CHARACTERS))

print("--- Model Summary ---")
model.summary()

# c) Perform Inference
# The model expects a batch, so we add a batch dimension to our single spectrogram
input_spectrogram = np.expand_dims(dummy_spectrogram, axis=0)
predictions = model.predict(input_spectrogram)

# d) Decode the output to get the final text
decoded_text = ctc_greedy_decode(predictions[0]) # Get the first (and only) item from the batch

print(f"\nInput Spectrogram Shape: {dummy_spectrogram.shape}")
print(f"Model Output Shape (Probabilities): {predictions.shape}")
print(f"Decoded Text: '{decoded_text}'")
print("\nNote: The output is gibberish because the model has not been trained.")