# Entrenamiento OCR una letra

Entrena el modelo de detección con las letras pares

In [1]:
import os
import cv2
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import string

In [23]:
# Parámetros
DATASET_DIR = 'train_ocr/min'
IMG_SIZE = 32
LETTERS = sorted([folder for folder in os.listdir(DATASET_DIR) if os.path.isdir(os.path.join(DATASET_DIR, folder))])
LETTER_TO_IDX = {letter: idx for idx, letter in enumerate(LETTERS)}
IDX_TO_LETTER = {idx: letter for letter, idx in LETTER_TO_IDX.items()}

# Cargar y etiquetar datos
def load_data_old():
    X, y = [], []
    for letter in LETTERS:
        folder_path = os.path.join(DATASET_DIR, letter)
        for filename in os.listdir(folder_path):
            if filename.endswith(".png"):
                img_path = os.path.join(folder_path, filename)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
                    X.append(img)
                    y.append(LETTER_TO_IDX[letter])
    return np.array(X), np.array(y)

def load_data():
    X, y = [], []
    for letter in LETTERS:
        folder_path = os.path.join(DATASET_DIR, letter)
        for filename in os.listdir(folder_path):
            if filename.endswith(".png"):
                # Verifica si el nombre (sin extensión) termina en un número par
                name_no_ext = os.path.splitext(filename)[0]
                if name_no_ext and name_no_ext[-1].isdigit() and int(name_no_ext[-1]) % 2 == 0:
                    img_path = os.path.join(folder_path, filename)
                    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                    if img is not None:
                        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
                        X.append(img)
                        y.append(LETTER_TO_IDX[letter])
    return np.array(X), np.array(y)

In [24]:
# Preprocesamiento
X, y = load_data()
X = X / 255.0
X = X.reshape(-1, IMG_SIZE, IMG_SIZE, 1)
y_cat = to_categorical(y, num_classes=len(LETTERS))

# División
X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.2, random_state=42)


In [25]:
# Modelo
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 1)),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(len(LETTERS), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m554/554[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.3324 - loss: 2.2160 - val_accuracy: 0.9066 - val_loss: 0.3117
Epoch 2/10
[1m554/554[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - accuracy: 0.9464 - loss: 0.2002 - val_accuracy: 0.9711 - val_loss: 0.1011
Epoch 3/10
[1m554/554[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.9766 - loss: 0.0856 - val_accuracy: 0.9806 - val_loss: 0.0653
Epoch 4/10
[1m554/554[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.9877 - loss: 0.0490 - val_accuracy: 0.9831 - val_loss: 0.0551
Epoch 5/10
[1m554/554[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.9876 - loss: 0.0425 - val_accuracy: 0.9810 - val_loss: 0.0580
Epoch 6/10
[1m554/554[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.9920 - loss: 0.0303 - val_accuracy: 0.9833 - val_loss: 0.0542
Epoch 7/10
[1m554/554[0m [32m━━━━━━━

<keras.src.callbacks.history.History at 0x1a39818b740>

In [26]:
# Guardar el modelo
model.save("ocr_model_letras_min.h5")



In [33]:
from tensorflow.keras.models import load_model

def predict_letter(img_path, model, idx_to_letter):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    img = img / 255.0
    img = img.reshape(1, IMG_SIZE, IMG_SIZE, 1)
    prediction = model.predict(img)
    predicted_index = np.argmax(prediction)
    return idx_to_letter[predicted_index]

# Ejemplo de uso
model = load_model("ocr_model_letras_min.h5")
pred = predict_letter("train_ocr/min/n/0001.png", model, IDX_TO_LETTER)
print("Letra predicha:", pred)








[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
Letra predicha: n


# OCR palabra

In [19]:
import os
import cv2
import numpy as np
from tensorflow.keras.models import load_model

In [20]:
IMG_SIZE = 32
IMG_DIR = 'test_ocr_words_plain'
LABEL_FILE = 'test_ocr_words_plain/gt.txt'
MODEL_PATH = 'ocr_model_letras_min.h5'

model = load_model(MODEL_PATH)
LETTERS = sorted(os.listdir('train_ocr/min'))
IDX_TO_LETTER = {i: letter for i, letter in enumerate(LETTERS)}



In [21]:
def deskew(image):
    coords = np.column_stack(np.where(image > 0))
    if coords.size == 0:
        return image
    angle = cv2.minAreaRect(coords)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    (h, w) = image.shape
    M = cv2.getRotationMatrix2D((w // 2, h // 2), angle, 1.0)
    return cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)

def segment_letters_robust(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    _, thresh = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    thresh = deskew(thresh)

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    dilated = cv2.dilate(thresh, kernel, iterations=1)

    contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    boxes = [cv2.boundingRect(c) for c in contours]
    boxes = sorted(boxes, key=lambda x: x[0])  # sort left to right

    letters = []
    for x, y, w, h in boxes:
        letter_img = thresh[y:y+h, x:x+w]
        resized = cv2.resize(letter_img, (IMG_SIZE, IMG_SIZE))
        norm = resized / 255.0
        norm = norm.reshape(1, IMG_SIZE, IMG_SIZE, 1)
        letters.append(norm)
    return letters

def predict_word(img_path):
    letter_imgs = segment_letters_robust(img_path)
    predicted = ""
    for img in letter_imgs:
        pred = model.predict(img)
        idx = np.argmax(pred)
        predicted += IDX_TO_LETTER[idx]
    return predicted

# Evaluación
def evaluar_dataset():
    total = 0
    correctas = 0

    with open(LABEL_FILE, 'r', encoding='utf-8') as f:
        for linea in f:
            nombre_archivo, *_, etiqueta = linea.strip().split(';')
            ruta_img = os.path.join(IMG_DIR, nombre_archivo)
            palabra_real = etiqueta.strip().lower()
            palabra_predicha = predict_word(ruta_img).lower()

            print(f"{nombre_archivo} | Real: {palabra_real} | Predicha: {palabra_predicha}")
            if palabra_real == palabra_predicha:
                correctas += 1
            total += 1

    precision = (correctas / total) * 100
    print(f"\n✅ Precisión total: {precision:.2f}% ({correctas}/{total})")


In [22]:
# Ejecutar evaluación
evaluar_dataset()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
0000.png | Real: campamento | Predicha: lmtummjumv
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m 