In [4]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, Model
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [32]:
img_width = 128
img_height = 64
num_classes = 36

# The issue is in the input shape configuration
# Our model expects (img_height, img_width, 1) but images are loaded as (img_width, img_height, 1)
# Let's correct the model definition:

inputs = layers.Input(shape=(img_height, img_width, 1))

# CNN for feature extraction
x = layers.Conv2D(64, (3,3), activation="relu", padding="same")(inputs)
x = layers.MaxPooling2D((2,2))(x)
x = layers.Conv2D(128, (3,3), activation="relu", padding="same")(x)
x = layers.MaxPooling2D((2,2))(x)
x = layers.Conv2D(256, (3,3), activation="relu", padding="same")(x)

# RNN for sequence processing - adjusted dimensions
x = layers.Reshape((img_height//4, img_width//4 * 256))(x)
x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)

# Dense layer for character classification
x = layers.Dense(num_classes + 1, activation="softmax")(x)  # +1 for CTC blank token

model = Model(inputs, x)

model.summary()

In [33]:
data_dir = 'dades'

# Crear diccionario para mapear caracteres a índices
characters = []
for folder in sorted(os.listdir(data_dir)):
    if os.path.isdir(os.path.join(data_dir, folder)):
        characters.append(folder)

# Asegurar que tenemos el número esperado de clases
len(characters) <= num_classes, f"Encontrados {len(characters)} caracteres, modelo configurado para {num_classes}"

(True, 'Encontrados 36 caracteres, modelo configurado para 36')

In [34]:
# Cargar imágenes y etiquetas
images = []
labels = []

for idx, char_folder in enumerate(characters):
    char_path = os.path.join(data_dir, char_folder)
    for img_file in os.listdir(char_path):
        if img_file.endswith(('.png', '.jpg', '.jpeg')):
            img_path = os.path.join(char_path, img_file)
            try:
                # Cargar imagen en escala de grises
                img = load_img(img_path, color_mode='grayscale', target_size=(img_width, img_height))
                img_array = img_to_array(img) / 255.0  # Normalizar
                images.append(img_array)
                labels.append(idx)  # Usar índice como etiqueta
            except Exception as e:
                print(f"Error al cargar {img_path}: {e}")

In [35]:
# Convertir a arrays de numpy
X = np.array(images)
y = tf.keras.utils.to_categorical(labels, num_classes=num_classes)

# Dividir en conjuntos de entrenamiento y validación
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Compilar modelo
model.compile(
    optimizer=optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [53]:
# Entrenar modelo
history = model.fit(
    X_train, y_train,
    batch_size=32,
    epochs=10,
    validation_data=(X_val, y_val),
    verbose=1
)

ValueError: Unrecognized data type: x=[[[[0.9137255 ]
   [0.9529412 ]
   [0.9647059 ]
   ...
   [0.99215686]
   [0.9843137 ]
   [0.9647059 ]]

  [[0.9411765 ]
   [0.972549  ]
   [0.9764706 ]
   ...
   [0.9882353 ]
   [0.9843137 ]
   [0.9764706 ]]

  [[0.972549  ]
   [0.99215686]
   [0.9882353 ]
   ...
   [0.98039216]
   [0.98039216]
   [0.98039216]]

  ...

  [[0.99607843]
   [0.99607843]
   [0.99607843]
   ...
   [0.9882353 ]
   [0.9882353 ]
   [0.98039216]]

  [[0.96862745]
   [0.972549  ]
   [0.9764706 ]
   ...
   [0.96862745]
   [0.96862745]
   [0.9647059 ]]

  [[0.95686275]
   [0.9607843 ]
   [0.96862745]
   ...
   [0.9882353 ]
   [0.9764706 ]
   [0.9607843 ]]]


 [[[0.99607843]
   [0.9843137 ]
   [0.9882353 ]
   ...
   [0.9607843 ]
   [0.9764706 ]
   [0.99215686]]

  [[1.        ]
   [0.98039216]
   [0.972549  ]
   ...
   [0.9882353 ]
   [0.9882353 ]
   [0.9882353 ]]

  [[0.98039216]
   [0.95686275]
   [0.9490196 ]
   ...
   [0.9647059 ]
   [0.9529412 ]
   [0.94509804]]

  ...

  [[0.9529412 ]
   [0.972549  ]
   [0.9843137 ]
   ...
   [0.9647059 ]
   [0.9764706 ]
   [0.9882353 ]]

  [[0.9490196 ]
   [0.9764706 ]
   [0.9843137 ]
   ...
   [0.972549  ]
   [0.9843137 ]
   [0.99215686]]

  [[0.9607843 ]
   [0.98039216]
   [0.98039216]
   ...
   [0.972549  ]
   [0.9607843 ]
   [0.93333334]]]


 [[[1.        ]
   [1.        ]
   [1.        ]
   ...
   [1.        ]
   [1.        ]
   [1.        ]]

  [[1.        ]
   [1.        ]
   [1.        ]
   ...
   [1.        ]
   [1.        ]
   [1.        ]]

  [[1.        ]
   [1.        ]
   [1.        ]
   ...
   [1.        ]
   [1.        ]
   [1.        ]]

  ...

  [[1.        ]
   [1.        ]
   [1.        ]
   ...
   [1.        ]
   [1.        ]
   [1.        ]]

  [[1.        ]
   [1.        ]
   [1.        ]
   ...
   [1.        ]
   [1.        ]
   [1.        ]]

  [[1.        ]
   [1.        ]
   [1.        ]
   ...
   [1.        ]
   [1.        ]
   [1.        ]]]


 ...


 [[[0.9411765 ]
   [0.972549  ]
   [0.9843137 ]
   ...
   [0.99215686]
   [0.9843137 ]
   [0.9764706 ]]

  [[0.9764706 ]
   [0.9764706 ]
   [0.972549  ]
   ...
   [0.9764706 ]
   [0.9843137 ]
   [0.9882353 ]]

  [[0.9647059 ]
   [0.9764706 ]
   [0.9843137 ]
   ...
   [0.98039216]
   [0.9647059 ]
   [0.9529412 ]]

  ...

  [[0.9647059 ]
   [0.972549  ]
   [0.98039216]
   ...
   [0.9764706 ]
   [0.9843137 ]
   [0.99607843]]

  [[0.9529412 ]
   [0.9607843 ]
   [0.9764706 ]
   ...
   [0.94509804]
   [0.9529412 ]
   [0.9607843 ]]

  [[0.9411765 ]
   [0.9529412 ]
   [0.96862745]
   ...
   [0.9843137 ]
   [0.9882353 ]
   [0.99215686]]]


 [[[1.        ]
   [1.        ]
   [1.        ]
   ...
   [1.        ]
   [1.        ]
   [1.        ]]

  [[1.        ]
   [1.        ]
   [1.        ]
   ...
   [1.        ]
   [1.        ]
   [1.        ]]

  [[1.        ]
   [1.        ]
   [1.        ]
   ...
   [1.        ]
   [1.        ]
   [1.        ]]

  ...

  [[1.        ]
   [1.        ]
   [1.        ]
   ...
   [1.        ]
   [1.        ]
   [1.        ]]

  [[1.        ]
   [1.        ]
   [1.        ]
   ...
   [1.        ]
   [1.        ]
   [1.        ]]

  [[1.        ]
   [1.        ]
   [1.        ]
   ...
   [1.        ]
   [1.        ]
   [1.        ]]]


 [[[0.9764706 ]
   [0.9843137 ]
   [0.9882353 ]
   ...
   [0.9843137 ]
   [0.98039216]
   [0.9764706 ]]

  [[0.972549  ]
   [0.9764706 ]
   [0.9843137 ]
   ...
   [0.9764706 ]
   [0.9764706 ]
   [0.972549  ]]

  [[0.9647059 ]
   [0.96862745]
   [0.972549  ]
   ...
   [0.98039216]
   [0.9843137 ]
   [0.9882353 ]]

  ...

  [[0.99215686]
   [0.9843137 ]
   [0.98039216]
   ...
   [0.9490196 ]
   [0.9411765 ]
   [0.95686275]]

  [[0.99607843]
   [0.9882353 ]
   [0.98039216]
   ...
   [0.95686275]
   [0.94509804]
   [0.96862745]]

  [[0.98039216]
   [0.972549  ]
   [0.96862745]
   ...
   [0.9490196 ]
   [0.9411765 ]
   [0.96862745]]]] (of type <class 'numpy.ndarray'>)

In [None]:
# Visualizar resultados del entrenamiento
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Precisión entrenamiento')
plt.plot(history.history['val_accuracy'], label='Precisión validación')
plt.title('Precisión del modelo')
plt.xlabel('Época')
plt.ylabel('Precisión')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Pérdida entrenamiento')
plt.plot(history.history['val_loss'], label='Pérdida validación')
plt.title('Pérdida del modelo')
plt.xlabel('Época')
plt.ylabel('Pérdida')
plt.legend()

plt.tight_layout()
plt.show()