In [1]:
# Imports and Setup
import os
import numpy as np
from datetime import datetime
from typing import Dict, List, Optional
import json
from pathlib import Path


In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
# Import custom components
from src.workflow import (
    create_workflow,
    run_fraud_detection,
    FraudDetectionConfig,
    WorkflowState
)
from src.nodes.receiver import TextPreprocessor, TextInput
from src.nodes.encoder import TextEncoder
from src.nodes.curator import CuratorAgent, FraudAnalysis
from src.nodes.utils import EmbeddingStorage, FraudTypeRegistry

In [4]:
# 2. Configuración inicial
def setup_environment():
    """Configurar el entorno y directorios necesarios"""
    os.makedirs("data/embeddings", exist_ok=True)
    os.makedirs("data/fraud_types", exist_ok=True)
    
    # Verificar token de HuggingFace
    if not os.getenv("HUGGINGFACE_TOKEN"):
        raise ValueError("HUGGINGFACE_TOKEN no encontrado en variables de entorno")
    
    return "Entorno configurado correctamente"

In [5]:
setup_environment()

'Entorno configurado correctamente'

In [6]:
# Test Data
test_cases = [
    {
        "text": "Hello, I am a Nigerian prince and I need your help to transfer $10 million. Please send your bank details.",
        "expected_fraud": True,
        "type": "FINANCIAL_SCAM"
    },
    {
        "text": "Your account has been compromised. Click here to verify your identity: http://fake-bank.com",
        "expected_fraud": True,
        "type": "PHISHING"
    },
    {
        "text": "Thank you for your purchase. Your order #12345 will arrive tomorrow.",
        "expected_fraud": False,
        "type": None
    }
]


In [7]:
# Initialize Components
def initialize_components():
    """Initialize all required components for testing."""
    preprocessor = TextPreprocessor()
    encoder = TextEncoder()
    curator = CuratorAgent()
    embedding_storage = EmbeddingStorage()
    type_registry = FraudTypeRegistry()
    
    return {
        "preprocessor": preprocessor,
        "encoder": encoder,
        "curator": curator,
        "embedding_storage": embedding_storage,
        "type_registry": type_registry
    }


In [8]:
initialize_components()

{'preprocessor': <src.nodes.receiver.TextPreprocessor at 0x24a121dbe80>,
 'encoder': <src.nodes.encoder.TextEncoder at 0x24a38173160>,
 'curator': <src.nodes.curator.CuratorAgent at 0x24a3945af50>,
 'embedding_storage': <src.nodes.utils.EmbeddingStorage at 0x24a394b3880>,
 'type_registry': <src.nodes.utils.FraudTypeRegistry at 0x24a394b2950>}

In [9]:
# Test Text Preprocessing
def test_preprocessor(preprocessor: TextPreprocessor):
    """Test the text preprocessing component."""
    print("Testing Text Preprocessor...")
    
    for case in test_cases:
        result = preprocessor.process_text(
            case["text"],
            source="test",
            metadata={"expected_fraud": case["expected_fraud"]}
        )
        
        print(f"\nInput text: {case['text']}")
        print(f"Processed text: {result.text if result else 'Invalid input'}")
        print(f"Valid input: {result is not None}")
        print("-" * 50)

In [10]:
# Test Embedding Storage
def test_storage(storage: EmbeddingStorage, encoder: TextEncoder, preprocessor: TextPreprocessor):
    """Test the embedding storage component."""
    print("Testing Embedding Storage...")
    
    for i, case in enumerate(test_cases):
        text_input = preprocessor.process_text(case["text"], source="test")
        if text_input:
            embedding = encoder.encode_text(text_input)
            
            # Save embedding
            case_id = f"test_case_{i}"
            storage.save_embedding(
                embedding,
                {
                    "text": text_input.text,
                    "expected_fraud": case["expected_fraud"],
                    "fraud_type": case["type"]
                },
                case_id
            )
            
            # Load and verify
            loaded_embedding, loaded_metadata = storage.load_embedding(case_id)
            print(f"\nCase {i}:")
            print(f"Original text: {case['text']}")
            print(f"Embedding matches: {np.allclose(embedding, loaded_embedding)}")
            print(f"Loaded metadata: {loaded_metadata}")
            print("-" * 50)

In [11]:
# Test Text Encoder
def test_encoder(encoder: TextEncoder, preprocessor: TextPreprocessor):
    """Test the text encoding component."""
    print("Testing Text Encoder...")
    
    for case in test_cases:
        text_input = preprocessor.process_text(case["text"], source="test")
        if text_input:
            embedding = encoder.encode_text(text_input)
            print(f"\nInput text: {case['text']}")
            print(f"Embedding shape: {embedding.shape}")
            print(f"Embedding norm: {np.linalg.norm(embedding)}")
            print("-" * 50)

In [12]:
# Test Complete Workflow
def test_workflow(config: FraudDetectionConfig = FraudDetectionConfig()):
    """Test the complete fraud detection workflow."""
    print("Testing Complete Workflow...")
    
    for case in test_cases:
        print(f"\nTesting case: {case['text'][:50]}...")
        
        try:
            result = run_fraud_detection(case["text"], config)
            
            print("Results:")
            print(f"Is fraud: {result['is_fraud']} (Expected: {case['expected_fraud']})")
            print(f"Fraud type: {result['fraud_type']} (Expected: {case['type']})")
            print(f"Explanation: {result['explanation']}")
            print(f"Similar cases found: {result['similar_cases_count']}")
            print(f"Should alert: {result['should_alert']}")
            
        except Exception as e:
            print(f"Error processing case: {e}")
        
        print("-" * 50)

In [13]:
# Debug Utilities
def inspect_state(state: Dict):
    """Utility function to inspect workflow state."""
    state = WorkflowState.model_validate(state)
    
    print("Current State:")
    print(f"Text input: {state.text_input.text if state.text_input else None}")
    print(f"Has embeddings: {state.embeddings is not None}")
    print(f"Similar cases: {len(state.similar_cases or [])}")
    print(f"Has analysis: {state.analysis is not None}")
    if state.analysis:
        print(f"Analysis results:")
        print(f"- Is fraud: {state.analysis.is_fraud}")
        print(f"- Fraud type: {state.analysis.fraud_type}")
    print(f"Should alert: {state.should_alert}")


In [14]:
# Run all tests
if __name__ == "__main__":
    # Initialize components
    components = initialize_components()
    
    # Run individual component tests
    test_preprocessor(components["preprocessor"])
    test_encoder(components["encoder"], components["preprocessor"])
    test_storage(components["embedding_storage"], components["encoder"], components["preprocessor"])
    
    # Run complete workflow test
    test_workflow()

Testing Text Preprocessor...

Input text: Hello, I am a Nigerian prince and I need your help to transfer $10 million. Please send your bank details.
Processed text: Hello, I am a Nigerian prince and I need your help to transfer 10 million. Please send your bank details.
Valid input: True
--------------------------------------------------

Input text: Your account has been compromised. Click here to verify your identity: http://fake-bank.com
Processed text: Your account has been compromised. Click here to verify your identity
Valid input: True
--------------------------------------------------

Input text: Thank you for your purchase. Your order #12345 will arrive tomorrow.
Processed text: Thank you for your purchase. Your order 12345 will arrive tomorrow.
Valid input: True
--------------------------------------------------
Testing Text Encoder...

Input text: Hello, I am a Nigerian prince and I need your help to transfer $10 million. Please send your bank details.
Embedding shape: (384