# Running advisor

Basic usage example for DeepSight Advisor.

This example demonstrates how to use the advisor with minimal configuration
to run a complete ML analysis pipeline.

In [1]:
from deepsight.core.advisor import AdvisorConfig, MLflowConfig
from deepsight.core.advisor import DeepSightAdvisor, run_analysis
from deepsight.utils.logging import setup_logging, get_logger

# Setup logging
setup_logging(level="INFO")
logger = get_logger(__name__)


pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.



In [2]:
run_id = "07c04cc42fd9461e98f7eb0bf42444fb"
tracking_uri="http://localhost:5000"

### Example 1: Using the convenience function

In [None]:
"""Example 1: Using the convenience function"""
result = run_analysis(
    run_id=run_id, tracking_uri=tracking_uri
)
logger.info(f"Analysis completed: {result.success}")
logger.info(f"Artifacts loaded: {len(result.artifacts_loaded)}")
logger.info(f"Execution time: {result.execution_time:.2f}s")

2025-09-21 15:48:42,094 - deepsight.core.advisor.orchestrator - INFO - Configuration loaded and validated successfully
2025-09-21 15:48:42,149 - deepsight.core.advisor.orchestrator - INFO - MLflow manager initialized: http://localhost:5000
2025-09-21 15:48:42,214 - deepsight.core.advisor.orchestrator - INFO - Artifact manager initialized
2025-09-21 15:48:42,215 - deepsight.core.query.builders.generator - INFO - QueryGenerator initialized successfully
2025-09-21 15:48:42,216 - deepsight.core.advisor.orchestrator - INFO - Query generator initialized
2025-09-21 15:48:42,217 - deepsight.core.advisor.orchestrator - INFO - Intelligence client initialized
2025-09-21 15:48:42,217 - deepsight.core.advisor.orchestrator - INFO - DeepSight Advisor initialized successfully
2025-09-21 15:48:42,218 - deepsight.core.advisor.orchestrator - INFO - Starting analysis for run_id: 07c04cc42fd9461e98f7eb0bf42444fb
2025-09-21 15:48:42,218 - deepsight.core.advisor.orchestrator - INFO - Step 1: Loading artifact

Downloading artifacts:   0%|          | 0/2 [00:00<?, ?it/s]

2025-09-21 15:48:42,362 - deepsight.core.advisor.orchestrator - INFO - Successfully loaded artifact: training_artifacts
2025-09-21 15:48:42,363 - deepsight.core.advisor.orchestrator - INFO - Loading artifact: deepchecks


Downloading artifacts:   0%|          | 0/2 [00:00<?, ?it/s]

2025-09-21 15:48:42,737 - deepsight.core.advisor.orchestrator - INFO - Successfully loaded artifact: deepchecks
2025-09-21 15:48:42,738 - deepsight.core.advisor.orchestrator - INFO - Artifact loading completed in 0.52 seconds
2025-09-21 15:48:42,739 - deepsight.core.advisor.orchestrator - INFO - Step 2: Generating query...
2025-09-21 15:48:42,754 - deepsight.core.advisor.orchestrator - INFO - Query generated successfully in 0.02 seconds
2025-09-21 15:48:42,755 - deepsight.core.advisor.orchestrator - INFO - Prompt length: 8735 characters
2025-09-21 15:48:42,756 - deepsight.core.advisor.orchestrator - INFO - Step 3: Executing intelligence query...


Output()

### Example 2: Using full configuration

In [None]:
config = AdvisorConfig(
    mlflow=MLflowConfig(
        tracking_uri=tracking_uri,
        run_id=run_id,
    )
)
advisor = DeepSightAdvisor(config)
result = advisor.run_analysis()
logger.info(f"Analysis completed: {result.success}")
logger.info(f"Summary: {advisor.get_summary(result)}")

# Train image classifier

This example demonstrates how to train a classification model using DeepSight's
training framework with comprehensive monitoring and validation.

Features demonstrated:
- Dataset loading with embeddings
- Model configuration and training
- MLflow integration for experiment tracking
- Deepchecks integration for data validation
- Lightning integration for training orchestration

Requirements:
- MLflow server running on localhost:5000
- Required datasets and embeddings available
- CUDA-compatible GPU (optional, will fallback to CPU)

Steps:
1. Loads the dataset
2. Configures the training parameters
3. Sets up the model
4. Configures monitoring and validation
5. Runs the training process


In [None]:
from pathlib import Path
from typing import Optional

# DeepSight imports
from deepsight.zoo.trainers.classification import (
    ClassificationTrainer,
    ClassificationTrainerConfig,
)
from deepsight.zoo.timm_models import TimmClassificationModel, ClassifierHead
from deepsight.utils.feature_extractor import FeatureExtractor
from deepsight.integrations.lightning import DeepSightCallback
from deepsight.utils.config import DeepchecksConfig
from deepsight.utils.logging import setup_logging, get_logger

# Setup logging
setup_logging(level="INFO")
logger = get_logger(__name__)

# Configuration
ROOT = Path(__file__).parents[1]
DEBUG = True

In [None]:
def load_food_dataset(embedding_model: Optional[FeatureExtractor] = None,split_size:str=":500"):
    """
    Load the Food-101 dataset with optional embeddings.

    Args:
        embedding_model: Optional feature extractor for generating embeddings

    Returns:
        Tuple of (train_dataset, val_dataset)
    """
    from deepsight.zoo.datasets.food import (
        load_train_and_val_datasets,
    )

    # Load dataset with limited samples for faster training
    train_dataset, val_dataset = load_train_and_val_datasets(
        embedding_model=embedding_model,
        num_workers=4,
        split_size=split_size,
        device="cpu",
        image_size=518,
    )

    logger.info(
        f"Loaded Food-101 dataset: {len(train_dataset)} train, {len(val_dataset)} val samples"
    )
    return train_dataset, val_dataset


In [None]:
logger.info("Starting DeepSight training example...")

# Load dataset
embedding_model = None  # FeatureExtractor(model_name="timm/vit_base_patch14_reg4_dinov2.lvd142m")
train_dataset, val_dataset = load_food_dataset(embedding_model)

In [None]:
# Configure training parameters
config = ClassificationTrainerConfig(
    # Dataset configuration
    num_classes=train_dataset.num_classes,
    label_to_class_map=train_dataset.label_to_class_map,
    # Data loading configuration
    batch_size=8,
    num_workers=4,
    pin_memory=False,
    # Hardware configuration
    accelerator="auto",  # Automatically detect GPU/CPU
    precision="bf16-mixed",  # Mixed precision for efficiency
    # Validation configuration
    val_check_interval=1,  # Validate every epoch
    # Training hyperparameters
    epochs=5,
    label_smoothing=0.0,
    lr=1e-3,  # Learning rate
    lrf=1e-2,  # Final learning rate
    weight_decay=5e-4,
    reweight_classes=False,
    # Monitoring configuration
    monitor="val_f1score",  # Metric to monitor for early stopping
    patience=10,
    min_delta=1e-3,
    mode="max",
    # MLflow configuration
    experiment_name="foodwaste_classification",
    run_name="default",
    log_best_model=True,
    tracking_uri="http://localhost:5000",
    # Checkpoint configuration
    dirpath=str(ROOT / "checkpoints"),
    filename="best-{epoch:02d}",
    save_weights_only=True,
)

logger.info(
    f"Training configuration: {config.num_classes} classes, {config.epochs} epochs"
)

In [None]:
# Initialize model
model = TimmClassificationModel(
    model_name="timm/mobilenetv4_hybrid_large.e600_r384_in1k",
    num_classes=train_dataset.num_classes,
    freeze_backbone=True,  # Freeze pretrained backbone
    hidden_dim=128,
    num_layers=2,
    dropout=0.2,
)

In [None]:
# Alternative: Use classifier head with embeddings
# model = ClassifierHead(
#     input_dim=embedding_model.feature_dim,
#     num_classes=train_dataset.num_classes,
#     num_layers=2,
#     hidden_dim=384,
#     dropout=0.2
# )

In [None]:
logger.info(f"Initialized model: {model.__class__.__name__}")

# Configure Deepchecks for data validation
deepchecks_config = DeepchecksConfig(
    train_test_validation=True,  # Validate train/test splits
    data_integrity=True,  # Check data quality
    save_results=True,  # Save validation results
    output_dir=str(ROOT / "deepchecks_results"),
    save_display=True,  # Save visualizations
    parse_results=True,  # Parse results for analysis
    batch_size=8,
    model_evaluation=True,  # Evaluate model performance
    max_samples=1000,  # Limit samples for faster processing
    random_state=42,
)

# Setup DeepSight callback for monitoring
deepsight_callback = DeepSightCallback(
    config.model_dump(),
    dataset_name="food101",
    deepchecks_config=deepchecks_config,
    train_dataset=train_dataset,
    val_dataset=val_dataset,
)

In [None]:
from lightning import Trainer
from deepsight.integrations.lightning import DeepSightCallback
from deepsight.utils.config import DeepchecksConfig

deepsight_callback = DeepSightCallback(
    dataset_name="my-dataset",
    deepchecks_config=DeepchecksConfig(),
    train_dataset=train_dataset,
    val_dataset=val_dataset,
)
trainer = Trainer(
            max_epochs=50,
            callbacks=[deepsight_callback()],
        )
trainer.fit(model, train_dataset, val_dataset)

In [None]:
# Initialize trainer and run training
trainer = ClassificationTrainer(config)
logger.info("Starting training...")

trainer.run(
    model=model,
    train_dataset=train_dataset,
    val_dataset=val_dataset,
    deepsight_callback=deepsight_callback,
    debug=DEBUG,
)

logger.info("Training completed successfully!")

# Creating prompts

DeepSight Query Generation Example

This example demonstrates how to generate prompts from artifacts
using DeepSight's query generation system.

Features demonstrated:
- Artifact loading and management
- Query generation from multiple artifacts
- Integration with AI providers
- Prompt building for analysis and insights

Requirements:
- MLflow server running on localhost:5000
- Valid run_id with Deepchecks and Training artifacts

Steps:
1. Connects to MLflow and loads artifacts
2. Generates intelligent prompts from artifact data
3. Optionally executes queries using AI providers
4. Displays generated prompts and responses

In [None]:
import logging
from pathlib import Path

# DeepSight imports
from deepsight.core.query import QueryGenerator
from deepsight.core import ArtifactsManager
from deepsight.integrations.mlflow import MLflowManager
from deepsight.core.artifacts import ArtifactPaths
from deepsight.utils.logging import setup_logging, get_logger

In [None]:
# Setup logging
setup_logging(level="INFO")
logger = get_logger(__name__)

In [None]:
logger.info("Starting DeepSight query generation example...")

run_id="07c04cc42fd9461e98f7eb0bf42444fb",  # Replace with your run_id

# Initialize MLflow manager
mlflow_mgr = MLflowManager(
    tracking_uri="http://localhost:5000",
    run_id=run_id, 
    dwnd_dir="tmp",
)

logger.info(f"Connected to MLflow at: {mlflow_mgr.tracking_uri}")
logger.info(f"Using run_id: {mlflow_mgr.run_id}")

# Initialize artifact manager
artifacts_mgr = ArtifactsManager(
    sqlite_path="tmp/artifacts.db",
    mlflow_manager=mlflow_mgr,
)

logger.info("Initialized artifact manager")

# Load Deepchecks artifacts
logger.info("Loading Deepchecks artifacts...")
artifacts = []
for key in [ArtifactPaths.DEEPCHECKS,ArtifactPaths.TRAINING]:
    art = artifacts_mgr.load_artifact(
        run_id=run_id,
        artifact_key=key,
        download_if_missing=True,
    )
    artifacts.append(art)
    
logger.info("Artifacts loaded successfully!")



In [None]:
# Generate query from artifacts
logger.info("Generating query from artifacts...")
query_generator = QueryGenerator()
prompt = query_generator.build_prompt(artifacts=artifacts)

In [None]:
prompt

# Artifact management

This example demonstrates how to manage and retrieve artifacts from MLflow
using DeepSight's artifact management system.

Features demonstrated:
- MLflow integration for artifact tracking
- Artifact registration and retrieval
- Local caching and download management
- Artifact metadata and content access

Usage:
    python examples/run_artifact_manager.py

Requirements:
- MLflow server running on localhost:5000
- Valid run_id with artifacts
- SQLite database for local caching

Steps:
1. Connects to MLflow tracking server
2. Initializes artifact manager with local caching
3. Demonstrates artifact registration and retrieval
4. Shows how to access artifact content and metadata

In [None]:
from deepsight.core import ArtifactsManager
from deepsight.integrations.mlflow import MLflowManager
from deepsight.core.artifacts import ArtifactPaths
from deepsight.utils.logging import setup_logging, get_logger

# Setup logging
setup_logging(level="INFO")
logger = get_logger(__name__)

In [None]:
logger.info("Starting DeepSight artifact management example...")

run_id="07c04cc42fd9461e98f7eb0bf42444fb",  # Replace with your run_id

# Initialize MLflow manager
mlflow_mgr = MLflowManager(
    tracking_uri="http://localhost:5000",
    run_id=run_id,
    dwnd_dir="tmp",  # Local download directory
)

logger.info(f"Connected to MLflow at: {mlflow_mgr.tracking_uri}")
logger.info(f"Using run_id: {mlflow_mgr.run_id}")

# Initialize artifact manager with local SQLite cache
artifacts_mgr = ArtifactsManager(
    sqlite_path="tmp/artifacts.db",
    mlflow_manager=mlflow_mgr,
)

logger.info("Initialized artifact manager with local caching")

In [None]:
artifact_key = ArtifactPaths.TRAINING  # Replace with actual artifact key

In [None]:
# Example 1: Register and download artifacts
artifacts_mgr.register_artifact(
     run_id=mlflow_mgr.run_id,
     artifact_key=artifact_key
)

In [None]:
# Example 2: Load training artifacts
logger.info("Loading training artifacts...")
artifact = artifacts_mgr.load_artifact(
    run_id=mlflow_mgr.run_id,
    artifact_key=artifact_key,
    download_if_missing=True,
)

In [None]:
# Display artifact information
artifact_dict = artifact.to_dict()
logger.info("Training artifact loaded successfully!")
logger.info(f"Artifact type: {type(artifact).__name__}")
logger.info(
    f"Artifact keys: {list(artifact_dict.keys()) if isinstance(artifact_dict, dict) else 'N/A'}"
)

# Print artifact content (be careful with large artifacts)
print("\n" + "=" * 50)
print("ARTIFACT CONTENT:")
print("=" * 50)
print(artifact_dict)

logger.info("Artifact management example completed successfully!")

# Run suite of checks

This example demonstrates how to run **comprehensive data validation suites**
using Deepchecks integration with DeepSight.

Features demonstrated:
- Data integrity validation
- Train/test validation
- Model evaluation (optional)
- Result parsing and analysis
- Integration with CLIP models for vision tasks


Requirements:
- Required datasets available
- Deepchecks installed
- Optional: CLIP model for advanced validation

Steps:
1. Loads the dataset and label mappings
2. Configures Deepchecks validation
3. Optionally sets up a CLIP model for advanced validation
4. Runs validation suites
5. Returns parsed results

In [None]:
# DeepSight imports
from deepsight.zoo.datasets.foodwaste import (
    get_label_mapping,
    translations_de_en,
    load_train_and_val_datasets,
)
from deepsight.zoo.timm_models import CLIPModel
from deepsight.core.data import ClassificationVisionDataLoader
from deepsight.integrations import DeepchecksRunner
from deepsight.utils import DeepchecksConfig
from deepsight.utils.logging import setup_logging, get_logger

# Setup logging
setup_logging(level="INFO")
logger = get_logger(__name__)

In [None]:
logger.info("Starting DeepSight data validation example...")

# Load dataset and label mappings
ing2name, ing2label = get_label_mapping()
train_dataset, val_dataset = load_train_and_val_datasets(image_size=1024)

# Prepare ingredient names for CLIP model (if used)
ingredients_en = ["a " + translations_de_en[t] for t in ing2name.values()]

logger.info(
    f"Loaded dataset: {len(train_dataset)} train, {len(val_dataset)} val samples"
)
logger.info(f"Number of classes: {len(ing2name)}")

# Configure Deepchecks validation
config = DeepchecksConfig(
    train_test_validation=True,  # Validate train/test distribution
    data_integrity=True,  # Check data quality and integrity
    model_evaluation=False,  # Skip model evaluation for data-only validation
    save_results=True,  # Save results to disk
    save_display=False,  # Skip saving visualizations
    save_results_format="json",  # Save in JSON format
    parse_results=True,  # Parse results for analysis
    output_dir="results",  # Output directory
)

# Initialize Deepchecks runner
runner = DeepchecksRunner(config)
logger.info("Initialized Deepchecks runner")

In [None]:
# Optional: Initialize CLIP model for advanced validation
model = None  # CLIPModel('PE-Core-T-16-384', ingredients_en)
if model is not None:
    logger.info("Using CLIP model for advanced validation")
else:
    logger.info("Running validation without model (data-only checks)")

# Prepare data loaders for Deepchecks
vision_train_data = ClassificationVisionDataLoader.load_from_dataset(
    train_dataset, batch_size=8, shuffle=True, model=model
)
vision_test_data = ClassificationVisionDataLoader.load_from_dataset(
    val_dataset, batch_size=8, shuffle=True, model=model
)

In [None]:
logger.info("Running validation suites...")

# Run validation suites
results = runner.run_suites(
    train_data=vision_train_data, test_data=vision_test_data
)

logger.info("Validation completed successfully!")
logger.info(f"Results saved to: {config.output_dir}")