# KYC Document Validator - Quick Test Notebook

This notebook provides a quick way to test the KYC Document Validator components.


In [None]:
import sys
import os
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), 'src'))

import numpy as np
import cv2
import matplotlib.pyplot as plt
from models import create_ensemble_model, compile_model
from ocr_utils import extract_document_info, validate_aadhaar_format, validate_pan_format
from fake_detector import comprehensive_fake_detection


## 1. Test Model Creation


In [None]:
# Create ensemble model
print("Creating ensemble model...")
model = create_ensemble_model(input_shape=(150, 150, 3), num_classes=4)
model = compile_model(model)

print("\nModel Summary:")
model.summary()


In [None]:
# Test forward pass
test_input = np.random.rand(1, 150, 150, 3).astype(np.float32)
predictions = model.predict(test_input, verbose=0)

print(f"Classification output shape: {predictions[0].shape}")
print(f"Authenticity output shape: {predictions[1].shape}")
print(f"\nClassification probabilities: {predictions[0][0]}")
print(f"Authenticity score: {predictions[1][0][0]:.4f}")


## 2. Test OCR Utilities


In [None]:
# Create a test image with text
test_image = np.ones((300, 500, 3), dtype=np.uint8) * 255

# Add some text
cv2.putText(test_image, "Aadhaar: 1234 5678 9012", (20, 50), 
            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)
cv2.putText(test_image, "PAN: ABCDE1234F", (20, 100), 
            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)
cv2.putText(test_image, "Name: John Doe", (20, 150), 
            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)

plt.figure(figsize=(10, 6))
plt.imshow(cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB))
plt.title("Test Document Image")
plt.axis('off')
plt.show()


In [None]:
# Extract document info
info = extract_document_info(test_image)
print("Extracted Information:")
print(f"Aadhaar Number: {info['aadhaar_number']}")
print(f"PAN Number: {info['pan_number']}")
print(f"Text Length: {info['text_length']}")
print(f"\nRaw Text (first 200 chars):\n{info['raw_text'][:200]}")


In [None]:
# Test validation functions
test_aadhaar = "123456789012"
test_pan = "ABCDE1234F"

print(f"Aadhaar '{test_aadhaar}' valid: {validate_aadhaar_format(test_aadhaar)}")
print(f"PAN '{test_pan}' valid: {validate_pan_format(test_pan)}")


## 3. Test Fake Detection


In [None]:
# Create a test image (simulating a document)
doc_image = np.ones((400, 600, 3), dtype=np.uint8) * 255

# Add some structure
cv2.rectangle(doc_image, (20, 20), (580, 380), (0, 0, 0), 2)
cv2.putText(doc_image, "AADHAAR CARD", (200, 100), 
            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
cv2.putText(doc_image, "1234 5678 9012", (200, 200), 
            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)

plt.figure(figsize=(10, 6))
plt.imshow(cv2.cvtColor(doc_image, cv2.COLOR_BGR2RGB))
plt.title("Test Document")
plt.axis('off')
plt.show()


In [None]:
# Run fake detection
ocr_text = extract_document_info(doc_image)['raw_text']
fake_result = comprehensive_fake_detection(doc_image, 'aadhaar', ocr_text)

print("Fake Detection Results:")
print(f"Is Fake: {fake_result['is_fake']}")
print(f"Authenticity Score: {fake_result['authenticity_score']:.4f}")
print(f"Issues Detected: {fake_result['issues']}")
print(f"\nDetailed Results:")
for key, value in fake_result['detailed_results'].items():
    print(f"  {key}: {value.get('issues', [])}")


## 4. Test with Real Image (if available)

Uncomment and modify the path to test with a real document image:


In [None]:
# Uncomment to test with real image
# image_path = "../data/test/aadhaar/sample.jpg"
# if os.path.exists(image_path):
#     real_image = cv2.imread(image_path)
#     real_image = cv2.cvtColor(real_image, cv2.COLOR_BGR2RGB)
#     
#     # Preprocess for model
#     preprocessed = cv2.resize(real_image, (150, 150))
#     preprocessed = preprocessed.astype(np.float32) / 255.0
#     
#     # Predict
#     pred = model.predict(np.expand_dims(preprocessed, axis=0), verbose=0)
#     class_names = ['Aadhaar', 'PAN', 'Fake', 'Other']
#     predicted_class = class_names[np.argmax(pred[0][0])]
#     
#     print(f"Predicted Class: {predicted_class}")
#     print(f"Confidence: {pred[0][0][np.argmax(pred[0][0])]:.4f}")
#     print(f"Authenticity: {pred[1][0][0]:.4f}")
#     
#     plt.figure(figsize=(10, 6))
#     plt.imshow(real_image)
#     plt.title(f"Predicted: {predicted_class}")
#     plt.axis('off')
#     plt.show()
# else:
#     print(f"Image not found at {image_path}")


## 5. Model Architecture Visualization


In [None]:
# Visualize model architecture
from tensorflow.keras.utils import plot_model

try:
    plot_model(model, to_file='model_architecture.png', show_shapes=True, show_layer_names=True)
    print("Model architecture saved to model_architecture.png")
except Exception as e:
    print(f"Could not generate architecture diagram: {e}")
    print("Install graphviz and pydot: pip install graphviz pydot")
