In [None]:
# Cell 1: Environment Check
import sys, os
sys.path.append('app')

from app.core.settings import settings
from datetime import datetime

print("🔍 Environment Check")
print(f"📅 Current Time: {datetime.now()}")
print(f"📁 Data Directory: {settings.DATA_DIR}")
print(f"🤖 Default Model: {settings.MODEL_NAME}")
print(f"🔍 Embedding Provider: {settings.EMBEDDING_PROVIDER}")

required_keys = {
    "OPENAI_API_KEY": settings.OPENAI_API_KEY,
    "GOOGLE_API_KEY": settings.GOOGLE_API_KEY
}

print("\nRequired API Keys:")
for key, value in required_keys.items():
    status = "✅" if value else "❌"
    print(f"   {status} {key}: {'Set' if value else 'Missing'}")

optional_keys = {
    "GROQ_API_KEY": settings.GROQ_API_KEY,
    "FIRECRAWL_API_KEY": settings.FIRECRAWL_API_KEY,
    "LANGCHAIN_PROJECT": settings.LANGCHAIN_PROJECT
}

print("\nOptional API Keys:")
for key, value in optional_keys.items():
    status = "✅" if value else "⚠️"
    print(f"   {status} {key}: {'Set' if value else 'Not set'}")

🔍 Environment Check
📅 Current Time: 2025-06-22 06:52:19.046431
📁 Data Directory: data
🤖 Default Model: gpt-4o
🔍 Embedding Provider: openai

Required API Keys:
   ✅ OPENAI_API_KEY: Set
   ✅ GOOGLE_API_KEY: Set

Optional API Keys:
   ✅ GROQ_API_KEY: Set
   ✅ FIRECRAWL_API_KEY: Set
   ⚠️ LANGCHAIN_PROJECT: Not set


In [2]:
# Cell 2: Create Organization
import sys
sys.path.append('app')

from app.services.document_service import DocumentService
from app.managers.document_manager import DocumentManager
from app.storage.json_storage import JsonStorage
from app.storage.vector_storage import VectorStorage
from app.clients.storage_client import VectorStorageClient
from app.services.scraper_service import WebScraperService

# Configuration
ORGANIZATION_ID = "demo-org-2024"
ORGANIZATION_NAME = "Demo Organization"

# Initialize services
json_storage = JsonStorage()
vector_storage_client = VectorStorageClient()
vector_storage = VectorStorage(vector_storage_client)
document_manager = DocumentManager(json_storage)
web_scraper_service = WebScraperService()
document_service = DocumentService(document_manager, vector_storage, web_scraper_service)

# Create organization
organization = document_service.get_or_create_organization(ORGANIZATION_ID, ORGANIZATION_NAME)

print(f"🏢 Organization Created/Retrieved")
print(f"   Name: {organization.name}")
print(f"   ID: {organization.id}")
print(f"   Documents: {organization.documents_count}")
print(f"   Created: {organization.created_at}")
print(f"   Active: {organization.is_active}")

🏢 Organization Created/Retrieved
   Name: Demo Organization
   ID: demo-org-2024
   Documents: 0
   Created: 2025-06-22 01:22:29.034254+00:00
   Active: True


In [None]:
# Cell 3: Ingest Direct Content
import sys, os
sys.path.append('app')

from app.services.document_service import DocumentService
from app.managers.document_manager import DocumentManager
from app.storage.json_storage import JsonStorage
from app.storage.vector_storage import VectorStorage
from app.clients.storage_client import VectorStorageClient
from app.services.scraper_service import WebScraperService

# Configuration
ORGANIZATION_ID = "demo-org-2024"

# Initialize services
json_storage = JsonStorage()
vector_storage_client = VectorStorageClient()
vector_storage = VectorStorage(vector_storage_client)
document_manager = DocumentManager(json_storage)
web_scraper_service = WebScraperService()
document_service = DocumentService(document_manager, vector_storage, web_scraper_service)

# Document content
documents = [{
    "title": "Python Best Practices",
    "content": """
    Python Best Practices for Clean Code
    
    1. Follow PEP 8 Style Guide
    - Use 4 spaces for indentation
    - Keep lines under 79 characters
    - Use descriptive variable names
    
    2. Write Docstrings
    - Document all functions and classes
    - Use triple quotes for docstrings
    
    3. Use Type Hints
    - Add type hints to function parameters
    - Use typing module for complex types
    
    4. Error Handling
    - Use specific exception types
    - Handle exceptions gracefully
    
    5. Testing
    - Write unit tests for all functions
    - Use pytest for testing framework
    """,
    "metadata": {"category": "programming", "language": "python"}
}]

# Ingest documents
success, message, document_ids = document_service.ingest_documents(
    documents=documents,
    org_id=ORGANIZATION_ID
)

print(f"📄 Direct Content Ingestion")
print(f"   Success: {success}")
print(f"   Message: {message}")
print(f"   Document IDs: {document_ids}")
print(f"   Documents Ingested: {len(document_ids) if document_ids else 0}")

Calculating embeddings: 1it [00:01,  1.10s/it]


📄 Direct Content Ingestion
   Success: True
   Message: Successfully ingested 1 documents (1 chunks)
   Document IDs: ['18772b5c-f228-48cb-a517-b10aea55b163']
   Documents Ingested: 1


In [None]:
# Cell 4: Ingest from URL
import sys, os
import asyncio
sys.path.append('app')

from app.services.document_service import DocumentService
from app.managers.document_manager import DocumentManager
from app.storage.json_storage import JsonStorage
from app.storage.vector_storage import VectorStorage
from app.clients.storage_client import VectorStorageClient
from app.services.scraper_service import WebScraperService

# Configuration
ORGANIZATION_ID = "demo-org-2024"
URL_TO_SCRAPE = "https://docs.python.org/3/tutorial/introduction.html"

# Initialize services
json_storage = JsonStorage()
vector_storage_client = VectorStorageClient()
vector_storage = VectorStorage(vector_storage_client)
document_manager = DocumentManager(json_storage)
web_scraper_service = WebScraperService()
document_service = DocumentService(document_manager, vector_storage, web_scraper_service)

# Ingest from URL
async def ingest_url():
    success, message, document_id = await document_service.ingest_document_from_url(
        url=URL_TO_SCRAPE,
        organization_id=ORGANIZATION_ID,
        title="Python Tutorial Introduction",
        scraping_method="auto"
    )
    
    print(f"🌐 URL Ingestion")
    print(f"   URL: {URL_TO_SCRAPE}")
    print(f"   Success: {success}")
    print(f"   Message: {message}")
    print(f"   Document ID: {document_id}")

# Run the async function
await ingest_url()

Calculating embeddings: 1it [00:01,  1.20s/it]


🌐 URL Ingestion
   URL: https://docs.python.org/3/tutorial/introduction.html
   Success: True
   Message: Successfully ingested document from URL: https://docs.python.org/3/tutorial/introduction.html
   Document ID: 5dea4fc9-8516-4cc9-916c-cc9bfbea2420


In [None]:
# Cell 5: Query Documents
import sys, os
sys.path.append('app')

from app.services.document_service import DocumentService
from app.managers.document_manager import DocumentManager
from app.storage.json_storage import JsonStorage
from app.storage.vector_storage import VectorStorage
from app.clients.storage_client import VectorStorageClient
from app.services.scraper_service import WebScraperService
from app.models.document import DocumentQuery

# Configuration
ORGANIZATION_ID = "demo-org-2024"
SEARCH_QUERY = "python best practices"

# Initialize services
json_storage = JsonStorage()
vector_storage_client = VectorStorageClient()
vector_storage = VectorStorage(vector_storage_client)
document_manager = DocumentManager(json_storage)
web_scraper_service = WebScraperService()
document_service = DocumentService(document_manager, vector_storage, web_scraper_service)

# Create query
query = DocumentQuery(
    query=SEARCH_QUERY,
    organization_id=ORGANIZATION_ID,
    method="semantic",
    top_k=3
)

# Execute query
response = document_service.query_documents(query)

print(f"🔍 Document Query")
print(f"   Query: {response.query}")
print(f"   Method: {response.method}")
print(f"   Results: {response.total_results}")
print(f"   Processing Time: {response.processing_time_ms:.2f}ms")

print(f"\n📄 Found Documents:")
for i, doc in enumerate(response.documents, 1):
    print(f"   {i}. {doc.title} (Score: {doc.score:.3f})")
    print(f"      Document ID: {doc.document_id}")
    print(f"      Content: {doc.content[:100]}...")


🔍 Document Query
   Query: python best practices
   Method: semantic
   Results: 3
   Processing Time: 952.63ms

📄 Found Documents:
   1. Python Best Practices (Score: 0.817)
      Document ID: 18772b5c-f228-48cb-a517-b10aea55b163
      Content: Python Best Practices for Clean Code 1. Follow PEP 8 Style Guide - Use 4 spaces for indentation - Ke...
   2. Python Tutorial Introduction (Score: 1.242)
      Document ID: 5dea4fc9-8516-4cc9-916c-cc9bfbea2420
      Content: [printf-style String Formatting](https://docs.python.org/3/library/stdtypes.html#old-string-formatti...
   3. Python Tutorial Introduction (Score: 1.267)
      Document ID: 5dea4fc9-8516-4cc9-916c-cc9bfbea2420
      Content: with a text editor; all decent text editors have an auto-indent facility. When a compound statement ...


In [None]:
# Cell 6: Create Campaign
import sys, os
import asyncio
sys.path.append('app')

from app.services.campaign_service import CampaignService
from app.services.document_service import DocumentService
from app.services.reddit_service import RedditService
from app.services.llm_service import LLMService
from app.managers.campaign_manager import CampaignManager
from app.managers.document_manager import DocumentManager
from app.storage.json_storage import JsonStorage
from app.storage.vector_storage import VectorStorage
from app.clients.storage_client import VectorStorageClient
from app.clients.llm_client import LLMClient
from app.clients.reddit_client import RedditClient
from app.services.scraper_service import WebScraperService
from app.models.campaign import CampaignCreateRequest, ResponseTone

# Configuration
ORGANIZATION_ID = "demo-org-2024"
REDDIT_CREDENTIALS = {
    "client_id": os.getenv("REDDIT_CLIENT_ID"),
    "client_secret": os.getenv("REDDIT_CLIENT_SECRET")
}

# Initialize services
json_storage = JsonStorage()
vector_storage_client = VectorStorageClient()
vector_storage = VectorStorage(vector_storage_client)
document_manager = DocumentManager(json_storage)
campaign_manager = CampaignManager(json_storage)
web_scraper_service = WebScraperService()
document_service = DocumentService(document_manager, vector_storage, web_scraper_service)
llm_client = LLMClient()
llm_service = LLMService(llm_client)
reddit_client = RedditClient(
    client_id=REDDIT_CREDENTIALS["client_id"],
    client_secret=REDDIT_CREDENTIALS["client_secret"]
)
reddit_service = RedditService(json_storage, reddit_client)
campaign_service = CampaignService(campaign_manager, document_service, reddit_service, llm_service)

# Create campaign
async def create_campaign():
    request = CampaignCreateRequest(
        name="Python Community Outreach",
        description="Engage with Python learning communities",
        response_tone=ResponseTone.HELPFUL,
        max_responses_per_day=5
    )
    
    success, message, campaign = await campaign_service.create_campaign(
        organization_id=ORGANIZATION_ID,
        request=request
    )
    
    print(f"🎯 Campaign Creation")
    print(f"   Success: {success}")
    print(f"   Message: {message}")
    if campaign:
        print(f"   Campaign ID: {campaign.id}")
        print(f"   Name: {campaign.name}")
        print(f"   Status: {campaign.status}")
        print(f"   Tone: {campaign.response_tone}")
    
    await campaign_service.cleanup()

await create_campaign()

🎯 Campaign Creation
   Success: True
   Message: Campaign 'Python Community Outreach' created successfully
   Campaign ID: 64a74b8f-951a-4ce0-8739-59450c238066
   Name: Python Community Outreach
   Status: CampaignStatus.CREATED
   Tone: ResponseTone.HELPFUL


In [None]:
# Cell 7: Discover Topics
import sys, os
sys.path.append('app')

from app.services.campaign_service import CampaignService
from app.services.document_service import DocumentService
from app.services.reddit_service import RedditService
from app.services.llm_service import LLMService
from app.managers.campaign_manager import CampaignManager
from app.managers.document_manager import DocumentManager
from app.storage.json_storage import JsonStorage
from app.storage.vector_storage import VectorStorage
from app.clients.storage_client import VectorStorageClient
from app.clients.llm_client import LLMClient
from app.clients.reddit_client import RedditClient
from app.services.scraper_service import WebScraperService
from app.models.campaign import SubredditDiscoveryRequest

# Configuration - Replace with actual IDs from previous cells
CAMPAIGN_ID = "64a74b8f-951a-4ce0-8739-59450c238066"  # Replace with actual campaign ID
DOCUMENT_IDS = ["5dea4fc9-8516-4cc9-916c-cc9bfbea2420","18772b5c-f228-48cb-a517-b10aea55b163"]  # Replace with actual document IDs
REDDIT_CREDENTIALS = {
    "client_id": os.getenv("REDDIT_CLIENT_ID"),
    "client_secret": os.getenv("REDDIT_CLIENT_SECRET")
}

# Initialize services
json_storage = JsonStorage()
vector_storage_client = VectorStorageClient()
vector_storage = VectorStorage(vector_storage_client)
document_manager = DocumentManager(json_storage)
campaign_manager = CampaignManager(json_storage)
web_scraper_service = WebScraperService()
document_service = DocumentService(document_manager, vector_storage, web_scraper_service)
llm_client = LLMClient()
llm_service = LLMService(llm_client)
reddit_client = RedditClient(
    client_id=REDDIT_CREDENTIALS["client_id"],
    client_secret=REDDIT_CREDENTIALS["client_secret"]
)
reddit_service = RedditService(json_storage, reddit_client)
campaign_service = CampaignService(campaign_manager, document_service, reddit_service, llm_service)

# Discover topics
async def discover_topics():
    request = SubredditDiscoveryRequest(document_ids=DOCUMENT_IDS)
    
    success, message, data = await campaign_service.discover_topics(
        campaign_id=CAMPAIGN_ID,
        request=request
    )
    
    print(f"🔍 Topic Discovery")
    print(f"   Success: {success}")
    print(f"   Message: {message}")
    if data and "topics" in data:
        print(f"   Topics Found: {len(data['topics'])}")
        for i, topic in enumerate(data["topics"], 1):
            print(f"      {i}. {topic}")
    
    await campaign_service.cleanup()

await discover_topics()

🔍 Topic Discovery
   Success: True
   Message: Extracted 20 topics from 2 documents
   Topics Found: 20
      1. Python
      2. Python Tutorial
      3. Python Documentation
      4. Python Interpreter
      5. Python Programming
      6. Data Types
      7. Numbers
      8. Strings
      9. Lists
      10. Arithmetic Operators
      11. Variables
      12. Comments
      13. Control Flow
      14. Functions
      15. Modules
      16. Error Handling
      17. Testing
      18. PEP 8
      19. Type Hints
      20. Docstrings


In [5]:
# Cell 8: Discover Subreddits
import sys, os
import asyncio
sys.path.append('app')

from app.services.campaign_service import CampaignService
from app.services.document_service import DocumentService
from app.services.reddit_service import RedditService
from app.services.llm_service import LLMService
from app.managers.campaign_manager import CampaignManager
from app.managers.document_manager import DocumentManager
from app.storage.json_storage import JsonStorage
from app.storage.vector_storage import VectorStorage
from app.clients.storage_client import VectorStorageClient
from app.clients.llm_client import LLMClient
from app.clients.reddit_client import RedditClient
from app.services.scraper_service import WebScraperService
from app.models.campaign import SubredditDiscoveryByTopicsRequest

# Configuration
CAMPAIGN_ID = "64a74b8f-951a-4ce0-8739-59450c238066"  # Replace with actual campaign ID
TOPICS = ["python", "programming", "coding", "software development"]  # Example topics
REDDIT_CREDENTIALS = {
    "client_id": os.getenv("REDDIT_CLIENT_ID"),
    "client_secret": os.getenv("REDDIT_CLIENT_SECRET")
}

# Initialize services
json_storage = JsonStorage()
vector_storage_client = VectorStorageClient()
vector_storage = VectorStorage(vector_storage_client)
document_manager = DocumentManager(json_storage)
campaign_manager = CampaignManager(json_storage)
web_scraper_service = WebScraperService()
document_service = DocumentService(document_manager, vector_storage, web_scraper_service)
llm_client = LLMClient()
llm_service = LLMService(llm_client)
reddit_client = RedditClient(
    client_id=REDDIT_CREDENTIALS["client_id"],
    client_secret=REDDIT_CREDENTIALS["client_secret"]
)
reddit_service = RedditService(json_storage, reddit_client)
campaign_service = CampaignService(campaign_manager, document_service, reddit_service, llm_service)

# Discover subreddits
async def discover_subreddits():
    request = SubredditDiscoveryByTopicsRequest(topics=TOPICS)
    
    success, message, data = await campaign_service.discover_subreddits(
        campaign_id=CAMPAIGN_ID,
        request=request
    )
    
    print(f"🎯 Subreddit Discovery")
    print(f"   Success: {success}")
    print(f"   Message: {message}")
    if data and "subreddits" in data:
        print(f"   Subreddits Found: {len(data['subreddits'])}")
        for i, subreddit in enumerate(data["subreddits"], 1):
            print(f"      {i}. r/{subreddit}")
    
    await campaign_service.cleanup()

await discover_subreddits()

🎯 Subreddit Discovery
   Success: True
   Message: Discovered 10 relevant subreddits
   Subreddits Found: 10
      1. r/Python
      2. r/learnpython
      3. r/PythonLearning
      4. r/PythonProjects2
      5. r/pythontips
      6. r/PythonJobs
      7. r/learnprogramming
      8. r/programming
      9. r/AskProgramming
      10. r/CodingHelp


In [6]:
# Cell 9: Search Subreddits
import sys, os
import asyncio
sys.path.append('app')

from app.services.reddit_service import RedditService
from app.storage.json_storage import JsonStorage
from app.clients.reddit_client import RedditClient

# Configuration
SEARCH_QUERY = "python programming"
REDDIT_CREDENTIALS = {
    "client_id": os.getenv("REDDIT_CLIENT_ID"),
    "client_secret": os.getenv("REDDIT_CLIENT_SECRET")
}

# Initialize services
json_storage = JsonStorage()
reddit_client = RedditClient(
    client_id=REDDIT_CREDENTIALS["client_id"],
    client_secret=REDDIT_CREDENTIALS["client_secret"]
)
reddit_service = RedditService(json_storage, reddit_client)

# Search subreddits
async def search_subreddits():
    success, message, results = await reddit_service.search_subreddits(SEARCH_QUERY, limit=5)
    
    print(f"🔍 Subreddit Search")
    print(f"   Query: {SEARCH_QUERY}")
    print(f"   Success: {success}")
    print(f"   Message: {message}")
    print(f"   Results: {len(results) if results else 0}")
    
    if results:
        print(f"\n🎯 Found Subreddits:")
        for i, subreddit in enumerate(results, 1):
            print(f"   {i}. r/{subreddit['name']} ({subreddit['subscribers']:,} subscribers)")
            print(f"      Description: {subreddit['description'][:80]}...")
    
    await reddit_service.cleanup()

await search_subreddits()

🔍 Subreddit Search
   Query: python programming
   Success: True
   Message: Found 5 subreddits for 'python programming'
   Results: 5

🎯 Found Subreddits:
   1. r/PythonProgramming (1,520 subscribers)
      Description: ...
   2. r/Python (1,366,988 subscribers)
      Description: The official Python community for Reddit! Stay up to date with the latest news, ...
   3. r/PythonLearning (36,420 subscribers)
      Description: Everything about learning the programming language Python....
   4. r/learnpython (935,779 subscribers)
      Description: Subreddit for posting questions and asking for general advice about all topics r...
   5. r/programming (6,778,149 subscribers)
      Description: Computer Programming...


In [7]:
# Cell 10: Generate LLM Response
import sys, os
import asyncio
sys.path.append('app')

from app.services.llm_service import LLMService
from app.clients.llm_client import LLMClient

# Configuration
PROMPT = "Explain the benefits of using Python for web development"

# Initialize services
llm_client = LLMClient()
llm_service = LLMService(llm_client)

# Generate response
async def generate_response():
    response = await llm_service.generate_completion(
        prompt=PROMPT,
        response_format="text",
        temperature=0.7
    )
    
    print(f"🤖 LLM Response Generation")
    print(f"   Prompt: {PROMPT}")
    print(f"   Response Length: {len(str(response))} characters")
    print(f"\n📝 Generated Response:")
    print(f"   {str(response)[:200]}...")

await generate_response()

🤖 LLM Response Generation
   Prompt: Explain the benefits of using Python for web development
   Response Length: 4610 characters

📝 Generated Response:
   Python has become a popular choice for web development due to its versatility, readability, and extensive ecosystem. Here's a breakdown of the benefits of using Python for web development:

**1. Reada...


In [None]:
# Cell 11: Extract Topics from Content
import sys, os
import asyncio
sys.path.append('app')

from app.services.llm_service import LLMService
from app.clients.llm_client import LLMClient

# Configuration
CONTENT = """
Python is a versatile programming language that's great for web development, 
data science, machine learning, and automation. It has frameworks like Django 
and Flask for web development, pandas and numpy for data analysis, and 
scikit-learn for machine learning. Python is also popular for DevOps and 
system administration tasks.
"""

# Initialize services
llm_client = LLMClient()
llm_service = LLMService(llm_client)

# Extract topics
async def extract_topics():
    success, message, topics = await llm_service.extract_topics_from_content(CONTENT)
    
    print(f"🔍 Topic Extraction")
    print(f"   Success: {success}")
    print(f"   Message: {message}")
    print(f"   Topics Found: {len(topics) if topics else 0}")
    
    if topics:
        print(f"\n📋 Extracted Topics:")
        for i, topic in enumerate(topics, 1):
            print(f"   {i}. {topic}")

await extract_topics()

In [8]:
# Cell 12: Get Analytics
import sys, os
sys.path.append('app')

from app.services.analytics_service import AnalyticsService
from app.managers.analytics_manager import AnalyticsManager
from app.managers.campaign_manager import CampaignManager
from app.managers.document_manager import DocumentManager
from app.storage.json_storage import JsonStorage

# Configuration
ORGANIZATION_ID = "demo-org-2024"

# Initialize services
json_storage = JsonStorage()
document_manager = DocumentManager(json_storage)
campaign_manager = CampaignManager(json_storage)
analytics_manager = AnalyticsManager(campaign_manager, document_manager)
analytics_service = AnalyticsService(analytics_manager)

# Get analytics
quick_stats = analytics_service.get_quick_stats(ORGANIZATION_ID)
platform_overview = analytics_service.get_overall_platform_metrics()

print(f"📊 Analytics Dashboard")
print(f"   Organization: {ORGANIZATION_ID}")

if "error" not in quick_stats:
    print(f"\n📈 Quick Stats:")
    print(f"   Total Campaigns: {quick_stats.get('total_campaigns', 0)}")
    print(f"   Active Campaigns: {quick_stats.get('active_campaigns', 0)}")
    print(f"   Total Documents: {quick_stats.get('total_documents', 0)}")
    print(f"   Success Rate: {quick_stats.get('success_rate', 0):.1f}%")

if "error" not in platform_overview:
    campaign_stats = platform_overview.get("campaign_stats", {})
    print(f"\n🌐 Platform Overview:")
    print(f"   Total Campaigns: {campaign_stats.get('total_campaigns', 0)}")
    print(f"   Total Organizations: {campaign_stats.get('total_organizations', 0)}")
    print(f"   Active Campaigns: {campaign_stats.get('active_campaigns', 0)}")
    print(f"   Platform Insights: {len(platform_overview.get('platform_insights', []))}")

📊 Analytics Dashboard
   Organization: demo-org-2024

📈 Quick Stats:
   Total Campaigns: 1
   Active Campaigns: 1
   Total Documents: 2
   Success Rate: 0.0%

🌐 Platform Overview:
   Total Campaigns: 1
   Total Organizations: 1
   Active Campaigns: 1
   Platform Insights: 1


In [None]:
# Cell 13: List Organizations
import sys, os
sys.path.append('app')

from app.services.document_service import DocumentService
from app.managers.document_manager import DocumentManager
from app.storage.json_storage import JsonStorage
from app.storage.vector_storage import VectorStorage
from app.clients.storage_client import VectorStorageClient
from app.services.scraper_service import WebScraperService

# Initialize services
json_storage = JsonStorage()
vector_storage_client = VectorStorageClient()
vector_storage = VectorStorage(vector_storage_client)
document_manager = DocumentManager(json_storage)
web_scraper_service = WebScraperService()
document_service = DocumentService(document_manager, vector_storage, web_scraper_service)

# List organizations
organizations = document_service.list_organizations()

print(f"🏢 Organizations List")
print(f"   Total Organizations: {len(organizations)}")

if organizations:
    print(f"\n📋 Organizations:")
    for i, org in enumerate(organizations, 1):
        print(f"   {i}. {org.name} ({org.id})")
        print(f"      Documents: {org.documents_count}")
        print(f"      Created: {org.created_at}")
        print(f"      Active: {org.is_active}")
else:
    print("   No organizations found")

In [None]:
# Cell 14: List Campaigns
import sys, os
import asyncio
sys.path.append('app')

from app.services.campaign_service import CampaignService
from app.services.document_service import DocumentService
from app.services.reddit_service import RedditService
from app.services.llm_service import LLMService
from app.managers.campaign_manager import CampaignManager
from app.managers.document_manager import DocumentManager
from app.storage.json_storage import JsonStorage
from app.storage.vector_storage import VectorStorage
from app.clients.storage_client import VectorStorageClient
from app.clients.llm_client import LLMClient
from app.clients.reddit_client import RedditClient
from app.services.scraper_service import WebScraperService

# Configuration
ORGANIZATION_ID = "demo-org-2024"
REDDIT_CREDENTIALS = {
    "client_id": os.getenv("REDDIT_CLIENT_ID"),
    "client_secret": os.getenv("REDDIT_CLIENT_SECRET")
}

# Initialize services
json_storage = JsonStorage()
vector_storage_client = VectorStorageClient()
vector_storage = VectorStorage(vector_storage_client)
document_manager = DocumentManager(json_storage)
campaign_manager = CampaignManager(json_storage)
web_scraper_service = WebScraperService()
document_service = DocumentService(document_manager, vector_storage, web_scraper_service)
llm_client = LLMClient()
llm_service = LLMService(llm_client)
reddit_client = RedditClient(
    client_id=REDDIT_CREDENTIALS["client_id"],
    client_secret=REDDIT_CREDENTIALS["client_secret"]
)
reddit_service = RedditService(json_storage, reddit_client)
campaign_service = CampaignService(campaign_manager, document_service, reddit_service, llm_service)

# List campaigns
async def list_campaigns():
    success, message, campaigns = await campaign_service.list_campaigns(ORGANIZATION_ID)
    
    print(f"📋 Campaigns List")
    print(f"   Organization: {ORGANIZATION_ID}")
    print(f"   Success: {success}")
    print(f"   Message: {message}")
    print(f"   Total Campaigns: {len(campaigns) if campaigns else 0}")
    
    if campaigns:
        print(f"\n🎯 Campaigns:")
        for i, campaign in enumerate(campaigns, 1):
            print(f"   {i}. {campaign.name} ({campaign.status})")
            print(f"      ID: {campaign.id}")
            print(f"      Created: {campaign.created_at}")
            print(f"      Tone: {campaign.response_tone}")
    
    await campaign_service.cleanup()

await list_campaigns()

In [None]:
# Cell 15: Web Scraping Test
import sys, os
sys.path.append('app')

from app.services.scraper_service import WebScraperService

# Configuration
TEST_URL = "https://httpbin.org/html"  # Simple test URL
SCRAPING_METHOD = "requests"  # Use requests method for reliability

# Initialize service
web_scraper = WebScraperService()

# Test scraping
scraped_content = web_scraper.scrape_url(TEST_URL, method=SCRAPING_METHOD)

print(f"🌐 Web Scraping Test")
print(f"   URL: {TEST_URL}")
print(f"   Method: {SCRAPING_METHOD}")
print(f"   Success: {scraped_content is not None}")

if scraped_content:
    print(f"   Content Length: {len(scraped_content)} characters")
    print(f"\n📄 Content Preview:")
    print(f"   {scraped_content[:200]}...")
else:
    print(f"   Failed to scrape content")