# Google Colab Setup for DSPy Evaluation

This notebook provides setup instructions and utilities for running DSPy evaluation experiments in Google Colab.

## Features
- Automatic environment setup
- GPU detection and configuration
- Model loading and configuration
- Evaluation pipeline setup
- Results visualization

## Owner
**Zheng Kai** - Responsible for Colab integration and GPU optimization.


In [None]:
# Install required packages
%pip install dspy-ai
%pip install openai
%pip install together
%pip install transformers
%pip install torch
%pip install datasets
%pip install mlflow
%pip install fastapi
%pip install uvicorn
%pip install pydantic
%pip install pyyaml
%pip install jsonlines
%pip install matplotlib
%pip install seaborn
%pip install pandas
%pip install numpy
%pip install scikit-learn
%pip install tqdm
%pip install wandb


In [None]:
# Clone the repository
!git clone https://github.com/your-org/dspy-eval.git
%cd dspy-eval


In [None]:
# Check GPU availability
import torch
import os

print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device count: {torch.cuda.device_count()}")
    print(f"Current CUDA device: {torch.cuda.current_device()}")
    print(f"CUDA device name: {torch.cuda.get_device_name()}")
    print(f"CUDA memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("No GPU available. Using CPU.")


In [None]:
# Environment setup
import sys
import os
from pathlib import Path

# Add project root to Python path
project_root = Path.cwd()
sys.path.insert(0, str(project_root))

# Set environment variables for Colab
os.environ['COLAB'] = 'true'
os.environ['CUDA_VISIBLE_DEVICES'] = '0' if torch.cuda.is_available() else ''

print(f"Project root: {project_root}")
print(f"Python path: {sys.path[0]}")


In [None]:
# Import project modules
try:
    from src.core.config import load_config
    from src.core.logging import setup_logging
    from src.core.artifacts import ArtifactManager
    from src.core.telemetry import TelemetryManager
    from datasets.registry import get_dataset
    print("✓ Project modules imported successfully")
except ImportError as e:
    print(f"✗ Import error: {e}")
    print("Make sure the project structure is correct")


In [None]:
# Configure models for Colab
import dspy
import openai
import together

# TODO: Set your API keys
# You can also use Colab's secrets manager
# from google.colab import userdata
# openai.api_key = userdata.get('OPENAI_API_KEY')
# together.api_key = userdata.get('TOGETHER_API_KEY')

# For now, use placeholder keys
openai.api_key = "your-openai-key-here"
together.api_key = "your-together-key-here"

# Configure DSPy LM
if torch.cuda.is_available():
    # Use GPU-optimized model
    lm = dspy.OpenAI(
        model="gpt-4o-mini",
        api_key=openai.api_key,
        max_tokens=1000,
        temperature=0.7
    )
else:
    # Use CPU-optimized model
    lm = dspy.OpenAI(
        model="gpt-3.5-turbo",
        api_key=openai.api_key,
        max_tokens=500,
        temperature=0.7
    )

dspy.settings.configure(lm=lm)
print(f"✓ DSPy configured with {lm.model}")


In [None]:
# Load sample data
try:
    # Load SharkTank data
    sharktank_data = get_dataset("sharktank", split="test", num_samples=5)
    print(f"✓ Loaded {len(sharktank_data)} SharkTank samples")
    
    # Load ANEETA data
    aneeta_data = get_dataset("aneeta", split="test", num_samples=5)
    print(f"✓ Loaded {len(aneeta_data)} ANEETA samples")
    
except Exception as e:
    print(f"✗ Error loading data: {e}")
    print("Creating sample data...")
    
    # Create sample data if loading fails
    sharktank_data = [
        {
            "sample_id": "st_001",
            "product_facts": "AI-powered fitness app",
            "guidelines": "Focus on market opportunity",
            "pitch": "Sample pitch for fitness app"
        }
    ]
    
    aneeta_data = [
        {
            "sample_id": "an_001",
            "question": "What is machine learning?",
            "context": "ML is a subset of AI",
            "answer": "Machine learning is..."
        }
    ]
    
    print("✓ Created sample data")


In [None]:
# Display sample data
import pandas as pd
import json

print("SharkTank Sample Data:")
print(json.dumps(sharktank_data[0], indent=2))

print("\nANEETA Sample Data:")
print(json.dumps(aneeta_data[0], indent=2))


In [None]:
# Run a simple evaluation
def run_simple_eval(data, project_name):
    """Run a simple evaluation on the data."""
    results = []
    
    for i, sample in enumerate(data):
        try:
            # Simple evaluation logic
            if project_name == "sharktank":
                # Evaluate pitch generation
                result = {
                    "sample_id": sample["sample_id"],
                    "input": sample["product_facts"],
                    "prediction": "Generated pitch placeholder",
                    "quality_score": 8.0,
                    "latency_ms": 1000
                }
            else:
                # Evaluate question answering
                result = {
                    "sample_id": sample["sample_id"],
                    "input": sample["question"],
                    "prediction": "Generated answer placeholder",
                    "quality_score": 8.5,
                    "latency_ms": 800
                }
            
            results.append(result)
            print(f"✓ Processed sample {i+1}/{len(data)}")
            
        except Exception as e:
            print(f"✗ Error processing sample {i+1}: {e}")
    
    return results

# Run evaluations
print("Running SharkTank evaluation...")
sharktank_results = run_simple_eval(sharktank_data, "sharktank")

print("\nRunning ANEETA evaluation...")
aneeta_results = run_simple_eval(aneeta_data, "aneeta")

print(f"\n✓ Completed evaluations:")
print(f"  SharkTank: {len(sharktank_results)} samples")
print(f"  ANEETA: {len(aneeta_results)} samples")


In [None]:
# Visualize results
import matplotlib.pyplot as plt
import seaborn as sns

# Create results DataFrame
sharktank_df = pd.DataFrame(sharktank_results)
aneeta_df = pd.DataFrame(aneeta_results)

# Plot quality scores
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# SharkTank quality scores
axes[0].hist(sharktank_df['quality_score'], bins=10, alpha=0.7, color='blue')
axes[0].set_title('SharkTank Quality Scores')
axes[0].set_xlabel('Quality Score')
axes[0].set_ylabel('Frequency')

# ANEETA quality scores
axes[1].hist(aneeta_df['quality_score'], bins=10, alpha=0.7, color='green')
axes[1].set_title('ANEETA Quality Scores')
axes[1].set_xlabel('Quality Score')
axes[1].set_ylabel('Frequency')

plt.tight_layout()
plt.show()

# Print summary statistics
print("\nSummary Statistics:")
print(f"SharkTank - Mean Quality: {sharktank_df['quality_score'].mean():.2f}")
print(f"SharkTank - Mean Latency: {sharktank_df['latency_ms'].mean():.0f}ms")
print(f"ANEETA - Mean Quality: {aneeta_df['quality_score'].mean():.2f}")
print(f"ANEETA - Mean Latency: {aneeta_df['latency_ms'].mean():.0f}ms")


In [None]:
# Save results
import json
from datetime import datetime

# Create results directory
results_dir = Path("colab_results")
results_dir.mkdir(exist_ok=True)

# Save results with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

with open(results_dir / f"sharktank_results_{timestamp}.json", 'w') as f:
    json.dump(sharktank_results, f, indent=2)

with open(results_dir / f"aneeta_results_{timestamp}.json", 'w') as f:
    json.dump(aneeta_results, f, indent=2)

print(f"✓ Results saved to {results_dir}")
print(f"  SharkTank: sharktank_results_{timestamp}.json")
print(f"  ANEETA: aneeta_results_{timestamp}.json")


## Next Steps

1. **Set up API keys** in Colab secrets
2. **Load actual datasets** from the repository
3. **Run full evaluations** using the CLI tools
4. **Compare results** across different models
5. **Export results** for further analysis

## GPU Optimization Tips

- Use smaller batch sizes for GPU memory efficiency
- Enable mixed precision training
- Monitor GPU memory usage
- Use gradient checkpointing for large models

## Troubleshooting

- If imports fail, check the project structure
- If GPU is not detected, restart runtime
- If memory issues occur, reduce batch size
- If API errors occur, check your API keys
