In [1]:
from diagrams import Diagram, Cluster, Edge
from diagrams.programming.language import Python
from diagrams.programming.framework import Flask
from diagrams.onprem.database import PostgreSQL
from diagrams.onprem.analytics import Spark
from diagrams.aws.storage import S3
from diagrams.aws.ml import Sagemaker
from diagrams.aws.compute import Lambda
from diagrams.generic.compute import Rack
from diagrams.generic.storage import Storage
from diagrams.generic.place import Datacenter

# Create the diagram
with Diagram("ML Backtesting Framework for Crypto Trading", show=False, filename="ml_backtesting_framework"):
    
    # External Data Sources
    with Cluster("External Data Sources"):
        cryptoquant = Datacenter("CryptoQuant")
        glassnode = Datacenter("Glassnode")
        coinglass = Datacenter("Coinglass")
    
    # Cybotrade API
    with Cluster("Cybotrade API"):
        api = Flask("API Gateway")
        cryptoquant >> Edge(label="Data") >> api
        glassnode >> Edge(label="Data") >> api
        coinglass >> Edge(label="Data") >> api
    
    # Data Pipeline
    with Cluster("Data Pipeline"):
        data_retrieval = Python("Data Retrieval")
        data_preprocessing = Python("Data Preprocessing")
        feature_engineering = Python("Feature Engineering")
        
        api >> Edge(label="API Key Auth") >> data_retrieval
        data_retrieval >> data_preprocessing >> feature_engineering
    
    # Data Storage
    with Cluster("Data Storage"):
        raw_data = Storage("Raw Data")
        processed_data = Storage("Processed Data")
        feature_store = Storage("Feature Store")
        
        data_retrieval >> raw_data
        data_preprocessing >> processed_data
        feature_engineering >> feature_store
    
    # ML Model
    with Cluster("ML Model"):
        hmm_model = Python("HMM Model")
        nlp_component = Python("NLP Component (Optional)")
        model_training = Python("Model Training")
        model_evaluation = Python("Model Evaluation")
        
        feature_store >> hmm_model
        feature_store >> nlp_component
        hmm_model >> model_training
        nlp_component >> model_training
        model_training >> model_evaluation
    
    # Backtesting Engine
    with Cluster("Backtesting Engine"):
        strategy_definition = Python("Strategy Definition")
        signal_generation = Python("Signal Generation")
        performance_metrics = Python("Performance Metrics")
        
        model_evaluation >> strategy_definition
        strategy_definition >> signal_generation
        signal_generation >> performance_metrics
    
    # Visualization
    with Cluster("Visualization"):
        dashboard = Python("Trading Dashboard")
        performance_report = Python("Performance Report")
        
        performance_metrics >> dashboard
        performance_metrics >> performance_report