In [3]:
!pip install tensorflow keras pillow numpy



In [4]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pickle
from PIL import Image

In [5]:
model = keras.models.load_model("captcha_prediction_model.keras")
print("Prediction model loaded!")

Prediction model loaded!


In [18]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pickle
from PIL import Image

# ---------- 1. Load preprocessing components ----------
with open("preprocessing_components.pkl", "rb") as f:
    preprocessing = pickle.load(f)

# Extract required values
characters = preprocessing["characters"]
max_len = preprocessing["max_length"]
img_width = preprocessing["img_width"]
img_height = preprocessing["img_height"]
char_to_num_vocab = preprocessing.get("char_to_num_vocab")  # optional

# Check if we need to add CTC blank token
model_output_classes = model.output_shape[-1]
if len(characters) < model_output_classes:
    print(f"[INFO] Model expects {model_output_classes} classes, but only {len(characters)} characters found.")
    print("[INFO] Adding CTC blank token...")
    characters = characters + [""]  # Add blank token for CTC
    
print(f"[INFO] Characters: {characters}")
print(f"[INFO] Character count: {len(characters)}, Model output classes: {model_output_classes}")
print(f"[INFO] Max length: {max_len}, Image size: {img_width}x{img_height}")

# ---------- 2. Load prediction model ----------
model = keras.models.load_model("captcha_prediction_model.keras")
print("[INFO] Prediction model loaded successfully!")

# Print model input shape for debugging
print(f"[DEBUG] Model input shape: {model.input_shape}")
print(f"[DEBUG] Model output shape: {model.output_shape}")

# ---------- 3. Preprocess input image ----------
def preprocess_image(image_input):
    """
    Preprocess image to match model's expected input format
    """
    try:
        # Load image
        if isinstance(image_input, str):
            image = Image.open(image_input)
        else:
            image = image_input
        
        print(f"[DEBUG] Original image size: {image.size}")
        print(f"[DEBUG] Original image mode: {image.mode}")
        
        # Get model's expected input shape
        expected_shape = model.input_shape
        expected_height = expected_shape[1]
        expected_width = expected_shape[2]
        expected_channels = expected_shape[3] if len(expected_shape) == 4 else 1
        
        print(f"[DEBUG] Expected shape: {expected_shape}")
        print(f"[DEBUG] Expected channels: {expected_channels}")
        
        # Resize image to expected dimensions
        image = image.resize((expected_width, expected_height))
        
        # Convert to appropriate color mode
        if expected_channels == 1:
            # Grayscale
            image = image.convert("L")
            image_array = np.array(image)
            # Normalize to [0,1]
            image_array = image_array.astype(np.float32) / 255.0
            # Add channel dimension: (H, W) -> (H, W, 1)
            image_array = np.expand_dims(image_array, axis=-1)
        elif expected_channels == 3:
            # RGB
            image = image.convert("RGB")
            image_array = np.array(image)
            # Normalize to [0,1]
            image_array = image_array.astype(np.float32) / 255.0
            # Already has 3 channels: (H, W, 3)
        else:
            raise ValueError(f"Unsupported number of channels: {expected_channels}")
        
        # Add batch dimension: (H, W, C) -> (1, H, W, C)
        image_array = np.expand_dims(image_array, axis=0)
        
        print(f"[DEBUG] Preprocessed image shape: {image_array.shape}")
        print(f"[DEBUG] Image value range: [{image_array.min():.3f}, {image_array.max():.3f}]")
        
        return image_array
        
    except Exception as e:
        print(f"[ERROR] Error in image preprocessing: {e}")
        raise

# ---------- 4. Prediction + Decoding ----------
def predict_captcha(image_path):
    """
    Preprocess, predict, and decode a CAPTCHA image.
    """
    try:
        print(f"[INFO] Processing image: {image_path}")
        
        # Preprocess
        processed_image = preprocess_image(image_path)
        
        # Predict
        print("[INFO] Making prediction...")
        prediction = model.predict(processed_image, verbose=0)
        
        print(f"[DEBUG] Prediction shape: {prediction.shape}")
        print(f"[DEBUG] Prediction sample: {prediction[0][:2]}")  # Show first 2 timesteps
        
        # Decode prediction (argmax per timestep)
        predicted_indices = [np.argmax(p) for p in prediction[0]]
        print(f"[DEBUG] Predicted indices: {predicted_indices}")
        
        # Handle CTC decoding (remove blanks and consecutive duplicates)
        decoded_text = ""
        prev_idx = -1
        
        for idx in predicted_indices:
            # Skip blank tokens (usually the last index)
            if idx == len(characters) - 1 and characters[-1] == "":
                continue
            # Skip consecutive duplicates (CTC rule)
            if idx != prev_idx:
                if idx < len(characters):
                    decoded_text += characters[idx]
                else:
                    print(f"[WARNING] Index {idx} out of range for characters list")
            prev_idx = idx
        
        return decoded_text
        
    except Exception as e:
        print(f"[ERROR] Error in prediction: {e}")
        raise

# ---------- 5. Alternative decoding methods ----------
def predict_captcha_with_confidence(image_path, confidence_threshold=0.5):
    """
    Predict CAPTCHA with confidence filtering
    """
    try:
        processed_image = preprocess_image(image_path)
        prediction = model.predict(processed_image, verbose=0)
        
        predicted_text = ""
        confidences = []
        prev_idx = -1
        
        for timestep_probs in prediction[0]:
            max_prob = np.max(timestep_probs)
            max_idx = np.argmax(timestep_probs)
            
            confidences.append(max_prob)
            
            # Skip blank tokens
            if max_idx == len(characters) - 1 and characters[-1] == "":
                continue
                
            # Skip consecutive duplicates and low confidence
            if max_idx != prev_idx and max_prob >= confidence_threshold:
                if max_idx < len(characters):
                    predicted_text += characters[max_idx]
                else:
                    predicted_text += "?"
            elif max_prob < confidence_threshold:
                # Only add ? for non-blank, non-duplicate low confidence predictions
                if max_idx != len(characters) - 1 and max_idx != prev_idx:
                    predicted_text += "?"
                    
            prev_idx = max_idx
        
        avg_confidence = np.mean(confidences)
        
        return predicted_text, avg_confidence, confidences
        
    except Exception as e:
        print(f"[ERROR] Error in confident prediction: {e}")
        raise

# ---------- 6. Test with error handling ----------
def test_captcha_prediction(image_path):
    """
    Test function with comprehensive error handling
    """
    try:
        # Check if file exists
        import os
        if not os.path.exists(image_path):
            print(f"[ERROR] Image file not found: {image_path}")
            return None
        
        # Basic prediction
        print("="*50)
        print("BASIC PREDICTION")
        print("="*50)
        predicted_text = predict_captcha(image_path)
        print(f"[RESULT] Predicted CAPTCHA: '{predicted_text}'")
        
        # Confidence-based prediction
        print("\n" + "="*50)
        print("CONFIDENCE-BASED PREDICTION")
        print("="*50)
        pred_text, avg_conf, confidences = predict_captcha_with_confidence(image_path)
        print(f"[RESULT] Predicted CAPTCHA: '{pred_text}'")
        print(f"[RESULT] Average confidence: {avg_conf:.3f}")
        print(f"[RESULT] Per-character confidence: {[f'{c:.3f}' for c in confidences]}")
        
        return predicted_text
        
    except Exception as e:
        print(f"[ERROR] Test failed: {e}")
        print("[DEBUG] Checking model and preprocessing compatibility...")
        
        # Debug model architecture
        print(f"[DEBUG] Model summary:")
        model.summary()
        
        return None

# ---------- 7. Example usage ----------
if __name__ == "__main__":
    captcha_path = "test_captcha.png"  # Change to your test file
    
    # Test the prediction
    result = test_captcha_prediction(captcha_path)
    
    if result is None:
        print("\n[INFO] Troubleshooting tips:")
        print("1. Check if the image file exists")
        print("2. Verify image dimensions match training data")
        print("3. Ensure image is in correct format (RGB/Grayscale)")
        print("4. Check if model was trained with same preprocessing")

[INFO] Model expects 20 classes, but only 19 characters found.
[INFO] Adding CTC blank token...
[INFO] Characters: ['c', '7', 'p', 'm', '6', 'b', 'n', '3', '8', 'y', 'w', 'e', 'x', '4', '5', '2', 'g', 'd', 'f', '']
[INFO] Character count: 20, Model output classes: 20
[INFO] Max length: 5, Image size: 200x50
[INFO] Prediction model loaded successfully!
[DEBUG] Model input shape: (None, 200, 50, 1)
[DEBUG] Model output shape: (None, 50, 20)
BASIC PREDICTION
[INFO] Processing image: test_captcha.png
[DEBUG] Original image size: (310, 60)
[DEBUG] Original image mode: RGB
[DEBUG] Expected shape: (None, 200, 50, 1)
[DEBUG] Expected channels: 1
[DEBUG] Preprocessed image shape: (1, 200, 50, 1)
[DEBUG] Image value range: [0.000, 0.988]
[INFO] Making prediction...
[DEBUG] Prediction shape: (1, 50, 20)
[DEBUG] Prediction sample: [[6.9475288e-07 2.0918046e-06 1.7906518e-07 5.1070015e-07 2.4167886e-07
  4.2985803e-08 2.7748973e-07 2.0843301e-07 5.6973153e-07 1.5583750e-07
  1.6884169e-07 2.0708154