In [None]:
# Cell 1: Environment Check
import sys, os
sys.path.append('app')

from app.core.settings import settings
from datetime import datetime

print("🔍 Environment Check")
print(f"📅 Current Time: {datetime.now()}")
print(f"📁 Data Directory: {settings.DATA_DIR}")
print(f"🤖 Default Model: {settings.MODEL_NAME}")
print(f"🔍 Embedding Provider: {settings.EMBEDDING_PROVIDER}")

required_keys = {
    "OPENAI_API_KEY": settings.OPENAI_API_KEY,
    "GOOGLE_API_KEY": settings.GOOGLE_API_KEY
}

print("\nRequired API Keys:")
for key, value in required_keys.items():
    status = "✅" if value else "❌"
    print(f"   {status} {key}: {'Set' if value else 'Missing'}")

optional_keys = {
    "GROQ_API_KEY": settings.GROQ_API_KEY,
    "FIRECRAWL_API_KEY": settings.FIRECRAWL_API_KEY,
    "LANGCHAIN_PROJECT": settings.LANGCHAIN_PROJECT
}

print("\nOptional API Keys:")
for key, value in optional_keys.items():
    status = "✅" if value else "⚠️"
    print(f"   {status} {key}: {'Set' if value else 'Not set'}")

In [None]:
# Cell 2: Initialize Services
import sys
sys.path.append('app')

from app.core.dependencies import service_container

# Configuration
ORGANIZATION_ID = "demo-org-2024"
ORGANIZATION_NAME = "Demo Organization"

# Get services from container
document_service = service_container.get_document_service()
campaign_service = service_container.get_campaign_service()
reddit_service = service_container.get_reddit_service()
llm_service = service_container.get_llm_service()
analytics_service = service_container.get_analytics_service()

# Create organization
organization = document_service.get_or_create_organization(ORGANIZATION_ID, ORGANIZATION_NAME)

print(f"🏢 Organization Created/Retrieved")
print(f"   Name: {organization.name}")
print(f"   ID: {organization.id}")
print(f"   Documents: {organization.documents_count}")
print(f"   Created: {organization.created_at}")
print(f"   Active: {organization.is_active}")

print(f"\n🔧 Services Initialized")
print(f"   Document Service: ✅")
print(f"   Campaign Service: ✅")
print(f"   Reddit Service: ✅")
print(f"   LLM Service: ✅")
print(f"   Analytics Service: ✅")

In [None]:
# Cell 3: Ingest Direct Content
import sys, os
sys.path.append('app')

from app.core.dependencies import service_container

# Configuration
ORGANIZATION_ID = "demo-org-2024"

# Get document service
document_service = service_container.get_document_service()

# Document content
documents = [{
    "title": "Python Best Practices",
    "content": """
    Python Best Practices for Clean Code
    
    1. Follow PEP 8 Style Guide
    - Use 4 spaces for indentation
    - Keep lines under 79 characters
    - Use descriptive variable names
    
    2. Write Docstrings
    - Document all functions and classes
    - Use triple quotes for docstrings
    
    3. Use Type Hints
    - Add type hints to function parameters
    - Use typing module for complex types
    
    4. Error Handling
    - Use specific exception types
    - Handle exceptions gracefully
    
    5. Testing
    - Write unit tests for all functions
    - Use pytest for testing framework
    """,
    "metadata": {"category": "programming", "language": "python"}
}]

# Ingest documents
success, message, document_ids = document_service.ingest_documents(
    documents=documents,
    org_id=ORGANIZATION_ID
)

print(f"📄 Direct Content Ingestion")
print(f"   Success: {success}")
print(f"   Message: {message}")
print(f"   Document IDs: {document_ids}")
print(f"   Documents Ingested: {len(document_ids) if document_ids else 0}")

In [None]:
# Cell 4: Ingest from URL
import sys, os
import asyncio
sys.path.append('app')

from app.core.dependencies import service_container

# Configuration
ORGANIZATION_ID = "demo-org-2024"
URL_TO_SCRAPE = "https://docs.python.org/3/tutorial/introduction.html"

# Get document service
document_service = service_container.get_document_service()

# Ingest from URL
async def ingest_url():
    success, message, document_id = await document_service.ingest_document_from_url(
        url=URL_TO_SCRAPE,
        organization_id=ORGANIZATION_ID,
        title="Python Tutorial Introduction",
        scraping_method="auto"
    )
    
    print(f"🌐 URL Ingestion")
    print(f"   URL: {URL_TO_SCRAPE}")
    print(f"   Success: {success}")
    print(f"   Message: {message}")
    print(f"   Document ID: {document_id}")

# Run the async function
await ingest_url()

In [None]:
# Cell 5: Query Documents
import sys, os
sys.path.append('app')

from app.core.dependencies import service_container
from app.models.document import DocumentQuery

# Configuration
ORGANIZATION_ID = "demo-org-2024"
SEARCH_QUERY = "python best practices"

# Get document service
document_service = service_container.get_document_service()

# Create query
query = DocumentQuery(
    query=SEARCH_QUERY,
    organization_id=ORGANIZATION_ID,
    method="semantic",
    top_k=3
)

# Execute query
response = document_service.query_documents(query)

print(f"🔍 Document Query")
print(f"   Query: {response.query}")
print(f"   Method: {response.method}")
print(f"   Results: {response.total_results}")
print(f"   Processing Time: {response.processing_time_ms:.2f}ms")

print(f"\n📄 Found Documents:")
for i, doc in enumerate(response.documents, 1):
    print(f"   {i}. {doc.title} (Score: {doc.score:.3f})")
    print(f"      Document ID: {doc.document_id}")
    print(f"      Content: {doc.content[:100]}...")

In [None]:
# Cell 6: Create Campaign
import sys, os
import asyncio
sys.path.append('app')

from app.core.dependencies import service_container
from app.models.campaign import CampaignCreateRequest, ResponseTone

# Configuration
ORGANIZATION_ID = "demo-org-2024"

# Get campaign service
campaign_service = service_container.get_campaign_service()

# Create campaign
async def create_campaign():
    request = CampaignCreateRequest(
        name="Python Community Outreach",
        description="Engage with Python learning communities",
        response_tone=ResponseTone.HELPFUL,
        max_responses_per_day=5
    )
    
    success, message, campaign = await campaign_service.create_campaign(
        organization_id=ORGANIZATION_ID,
        request=request
    )
    
    print(f"🎯 Campaign Creation")
    print(f"   Success: {success}")
    print(f"   Message: {message}")
    if campaign:
        print(f"   Campaign ID: {campaign.id}")
        print(f"   Name: {campaign.name}")
        print(f"   Status: {campaign.status}")
        print(f"   Tone: {campaign.response_tone}")
        return campaign.id
    
    await campaign_service.cleanup()
    return None

campaign_id = await create_campaign()
print(f"\n📝 Campaign ID for next steps: {campaign_id}")

In [None]:
# Cell 7: Discover Topics
import sys, os
sys.path.append('app')

from app.core.dependencies import service_container
from app.models.campaign import SubredditDiscoveryRequest

# Configuration - Replace with actual IDs from previous cells
CAMPAIGN_ID = campaign_id  # From previous cell
DOCUMENT_IDS = ["doc-1", "doc-2"]  # Replace with actual document IDs from ingestion

# Get campaign service
campaign_service = service_container.get_campaign_service()

# Discover topics
async def discover_topics():
    request = SubredditDiscoveryRequest(document_ids=DOCUMENT_IDS)
    
    success, message, data = await campaign_service.discover_topics(
        campaign_id=CAMPAIGN_ID,
        request=request
    )
    
    print(f"🔍 Topic Discovery")
    print(f"   Success: {success}")
    print(f"   Message: {message}")
    if data and "topics" in data:
        print(f"   Topics Found: {len(data['topics'])}")
        for i, topic in enumerate(data["topics"], 1):
            print(f"      {i}. {topic}")
        return data["topics"]
    
    await campaign_service.cleanup()
    return []

topics = await discover_topics()
print(f"\n📝 Topics for next steps: {topics[:5]}...")  # Show first 5 topics

In [None]:
# Cell 8: Discover Subreddits
import sys, os
import asyncio
sys.path.append('app')

from app.core.dependencies import service_container
from app.models.campaign import SubredditDiscoveryByTopicsRequest

# Configuration
CAMPAIGN_ID = campaign_id  # From previous cell
TOPICS = topics[:4] if topics else ["python", "programming", "coding", "software development"]

# Get campaign service
campaign_service = service_container.get_campaign_service()

# Discover subreddits
async def discover_subreddits():
    request = SubredditDiscoveryByTopicsRequest(topics=TOPICS)
    
    success, message, data = await campaign_service.discover_subreddits(
        campaign_id=CAMPAIGN_ID,
        request=request
    )
    
    print(f"🎯 Subreddit Discovery")
    print(f"   Success: {success}")
    print(f"   Message: {message}")
    if data and "subreddits" in data:
        print(f"   Subreddits Found: {len(data['subreddits'])}")
        for i, subreddit in enumerate(data["subreddits"], 1):
            print(f"      {i}. r/{subreddit}")
        return data["subreddits"]
    
    await campaign_service.cleanup()
    return []

subreddits = await discover_subreddits()
print(f"\n📝 Subreddits for next steps: {subreddits[:3]}...")  # Show first 3 subreddits

In [None]:
# Cell 9: Search Subreddits
import sys, os
import asyncio
sys.path.append('app')

from app.core.dependencies import service_container

# Configuration
SEARCH_QUERY = "python programming"

# Get reddit service
reddit_service = service_container.get_reddit_service()

# Search subreddits
async def search_subreddits():
    success, message, results = await reddit_service.search_subreddits(SEARCH_QUERY, limit=5)
    
    print(f"🔍 Subreddit Search")
    print(f"   Query: {SEARCH_QUERY}")
    print(f"   Success: {success}")
    print(f"   Message: {message}")
    print(f"   Results: {len(results) if results else 0}")
    
    if results:
        print(f"\n🎯 Found Subreddits:")
        for i, subreddit in enumerate(results, 1):
            print(f"   {i}. r/{subreddit['name']} ({subreddit['subscribers']:,} subscribers)")
            print(f"      Description: {subreddit['description'][:80]}...")
    
    await reddit_service.cleanup()

await search_subreddits()

In [None]:
# Cell 10: Generate LLM Response
import sys, os
import asyncio
sys.path.append('app')

from app.core.dependencies import service_container

# Configuration
PROMPT = "Explain the benefits of using Python for web development"

# Get LLM service
llm_service = service_container.get_llm_service()

# Generate response
async def generate_response():
    response = await llm_service.generate_completion(
        prompt=PROMPT,
        response_format="text",
        temperature=0.7
    )
    
    print(f"🤖 LLM Response Generation")
    print(f"   Prompt: {PROMPT}")
    print(f"   Response Length: {len(str(response))} characters")
    print(f"\n📝 Generated Response:")
    print(f"   {str(response)[:200]}...")

await generate_response()

In [None]:
# Cell 11: Extract Topics from Content
import sys, os
import asyncio
sys.path.append('app')

from app.core.dependencies import service_container

# Configuration
CONTENT = """
Python is a versatile programming language that's great for web development, 
data science, machine learning, and automation. It has frameworks like Django 
and Flask for web development, pandas and numpy for data analysis, and 
scikit-learn for machine learning. Python is also popular for DevOps and 
system administration tasks.
"""

# Get LLM service
llm_service = service_container.get_llm_service()

# Extract topics
async def extract_topics():
    success, message, topics = await llm_service.extract_topics_from_content(CONTENT)
    
    print(f"🔍 Topic Extraction")
    print(f"   Success: {success}")
    print(f"   Message: {message}")
    print(f"   Topics Found: {len(topics) if topics else 0}")
    
    if topics:
        print(f"\n📋 Extracted Topics:")
        for i, topic in enumerate(topics, 1):
            print(f"   {i}. {topic}")

await extract_topics()

In [None]:
# Cell 12: Get Analytics
import sys, os
sys.path.append('app')

from app.core.dependencies import service_container

# Configuration
ORGANIZATION_ID = "demo-org-2024"

# Get analytics service
analytics_service = service_container.get_analytics_service()

# Get analytics
quick_stats = analytics_service.get_quick_stats(ORGANIZATION_ID)
platform_overview = analytics_service.get_overall_platform_metrics()

print(f"📊 Analytics Dashboard")
print(f"   Organization: {ORGANIZATION_ID}")

if "error" not in quick_stats:
    print(f"\n📈 Quick Stats:")
    print(f"   Total Campaigns: {quick_stats.get('total_campaigns', 0)}")
    print(f"   Active Campaigns: {quick_stats.get('active_campaigns', 0)}")
    print(f"   Total Documents: {quick_stats.get('total_documents', 0)}")
    print(f"   Success Rate: {quick_stats.get('success_rate', 0):.1f}%")

if "error" not in platform_overview:
    campaign_stats = platform_overview.get("campaign_stats", {})
    print(f"\n🌐 Platform Overview:")
    print(f"   Total Campaigns: {campaign_stats.get('total_campaigns', 0)}")
    print(f"   Total Organizations: {campaign_stats.get('total_organizations', 0)}")
    print(f"   Active Campaigns: {campaign_stats.get('active_campaigns', 0)}")
    print(f"   Platform Insights: {len(platform_overview.get('platform_insights', []))}")

In [None]:
# Cell 13: List Organizations
import sys, os
sys.path.append('app')

from app.core.dependencies import service_container

# Get document service
document_service = service_container.get_document_service()

# List organizations
organizations = document_service.list_organizations()

print(f"🏢 Organizations List")
print(f"   Total Organizations: {len(organizations)}")

if organizations:
    print(f"\n📋 Organizations:")
    for i, org in enumerate(organizations, 1):
        print(f"   {i}. {org.name} ({org.id})")
        print(f"      Documents: {org.documents_count}")
        print(f"      Created: {org.created_at}")
        print(f"      Active: {org.is_active}")
else:
    print("   No organizations found")

In [None]:
# Cell 14: List Campaigns
import sys, os
import asyncio
sys.path.append('app')

from app.core.dependencies import service_container

# Configuration
ORGANIZATION_ID = "demo-org-2024"

# Get campaign service
campaign_service = service_container.get_campaign_service()

# List campaigns
async def list_campaigns():
    success, message, campaigns = await campaign_service.list_campaigns(ORGANIZATION_ID)
    
    print(f"📋 Campaigns List")
    print(f"   Organization: {ORGANIZATION_ID}")
    print(f"   Success: {success}")
    print(f"   Message: {message}")
    print(f"   Total Campaigns: {len(campaigns) if campaigns else 0}")
    
    if campaigns:
        print(f"\n🎯 Campaigns:")
        for i, campaign in enumerate(campaigns, 1):
            print(f"   {i}. {campaign.name} ({campaign.status})")
            print(f"      ID: {campaign.id}")
            print(f"      Created: {campaign.created_at}")
            print(f"      Tone: {campaign.response_tone}")
    
    await campaign_service.cleanup()

await list_campaigns()

In [None]:
# Cell 15: Web Scraping Test
import sys, os
sys.path.append('app')

from app.services.scraper_service import WebScraperService

# Configuration
TEST_URL = "https://httpbin.org/html"  # Simple test URL
SCRAPING_METHOD = "requests"  # Use requests method for reliability

# Initialize service
web_scraper = WebScraperService()

# Test scraping
scraped_content = web_scraper.scrape_url(TEST_URL, method=SCRAPING_METHOD)

print(f"🌐 Web Scraping Test")
print(f"   URL: {TEST_URL}")
print(f"   Method: {SCRAPING_METHOD}")
print(f"   Success: {scraped_content is not None}")

if scraped_content:
    print(f"   Content Length: {len(scraped_content)} characters")
    print(f"\n📄 Content Preview:")
    print(f"   {scraped_content[:200]}...")
else:
    print(f"   Failed to scrape content")