# Discover Posts and Generate Responses

This notebook uses discovered subreddits to find relevant posts, analyze them, generate responses, and post them to Reddit.

**Workflow:**
1. Load discovered subreddits and select target subreddits
2. Search for relevant posts in selected subreddits
3. Analyze all discovered posts and generate responses
4. Post approved responses to Reddit

‚ö†Ô∏è **Warning:** Cell 4 will attempt to post responses to Reddit using your configured credentials!

## Cell 1: Setup and Load Subreddits

Initialize services and load the previously discovered subreddits.

In [None]:
import asyncio
import json
import os
import sys

# Add the parent directory to the system path to allow importing from src
sys.path.insert(0, os.path.abspath('../'))

from src.config.settings import settings
from src.storage.json_storage import JsonStorage

# Initialize services
json_storage = JsonStorage()

# Load discovered subreddits from previous notebook
print("Loading discovered subreddits...")
try:
    with open('discovered_subreddits_output.json', 'r') as f:
        subreddits_data = json.load(f)
    
    ranked_subreddits = subreddits_data['ranked_subreddits']
    organization_id = subreddits_data['organization_id']
    
    if not subreddits_data['discovery_success']:
        print(f"‚ùå Cannot proceed: Subreddit discovery failed in previous notebook")
        print(f"Error: {subreddits_data.get('error_message', 'Unknown error')}")
    else:
        print(f"üìã Loaded {len(ranked_subreddits)} discovered subreddits:")
        for i, subreddit in enumerate(ranked_subreddits, 1):
            print(f"  {i}. r/{subreddit}")
        
        # Select top subreddits to focus on (limit to 2-3 for manageable processing)
        selected_subreddits = ranked_subreddits[:3]  # Top 3 subreddits
        
        print(f"\nüéØ Selected {len(selected_subreddits)} subreddits for post discovery:")
        for i, subreddit in enumerate(selected_subreddits, 1):
            print(f"  {i}. r/{subreddit}")
        
        # Save selected subreddits
        selected_subreddits_data = {
            "selected_subreddits": selected_subreddits,
            "organization_id": organization_id,
            "all_discovered_subreddits": ranked_subreddits,
            "selection_success": True
        }
        
        with open('selected_subreddits.json', 'w') as f:
            json.dump(selected_subreddits_data, f, indent=2)
        
        print(f"\nüìÅ Saved selected subreddits to selected_subreddits.json")

except FileNotFoundError:
    print("‚ùå Error: discovered_subreddits_output.json not found.")
    print("Please run the ingest_extract_discover.ipynb notebook first.")
except Exception as e:
    print(f"‚ùå Error loading subreddits: {str(e)}")

## Cell 2: Discover Posts

Search for relevant posts within the selected subreddits.

In [None]:
import asyncio
import json
import os
import sys

# Add the parent directory to the system path to allow importing from src
sys.path.insert(0, os.path.abspath('../'))

from src.config.settings import settings
from src.clients.reddit_client import RedditClient

# Initialize Reddit client
reddit_client = RedditClient(
    client_id=settings.REDDIT_CLIENT_ID,
    client_secret=settings.REDDIT_CLIENT_SECRET,
    username=settings.REDDIT_USERNAME,
    password=settings.REDDIT_PASSWORD
)

# Load selected subreddits from previous cell
print("Loading selected subreddits...")
try:
    with open('selected_subreddits.json', 'r') as f:
        selected_data = json.load(f)
    
    selected_subreddits = selected_data['selected_subreddits']
    organization_id = selected_data['organization_id']
    
    if not selected_data['selection_success']:
        print(f"‚ùå Cannot proceed: Subreddit selection failed")
    else:
        print(f"üéØ Target subreddits: {', '.join([f'r/{s}' for s in selected_subreddits])}")
        
        # Define search queries related to our AI marketing platform
        search_queries = [
            "AI marketing",
            "marketing automation",
            "social media management",
            "content generation",
            "marketing tools"
        ]
        
        print(f"\nüîç Search queries: {', '.join(search_queries)}")
        
        # Discover posts from selected subreddits
        all_discovered_posts = []
        
        async with reddit_client:
            for subreddit in selected_subreddits:
                print(f"\nüì° Searching r/{subreddit}...")
                
                for query in search_queries:
                    try:
                        posts = await reddit_client.search_subreddit_posts(
                            subreddit=subreddit,
                            query=query,
                            sort="relevance",
                            time_filter="month",  # Posts from last month
                            limit=3  # Limit per query to manage volume
                        )
                        
                        for post in posts:
                            # Add search context to post data
                            post['search_query'] = query
                            post['source_subreddit'] = subreddit
                            all_discovered_posts.append(post)
                        
                        print(f"  Found {len(posts)} posts for '{query}'")
                        
                    except Exception as e:
                        print(f"  ‚ö†Ô∏è Error searching '{query}' in r/{subreddit}: {str(e)}")
        
        # Remove duplicates based on post ID
        unique_posts = {}
        for post in all_discovered_posts:
            post_id = post['id']
            if post_id not in unique_posts:
                unique_posts[post_id] = post
        
        discovered_posts = list(unique_posts.values())
        
        # Limit total posts to process (top 10 most relevant)
        discovered_posts = discovered_posts[:10]
        
        print(f"\n‚úÖ Discovery complete!")
        print(f"üìä Found {len(discovered_posts)} unique posts to analyze:")
        
        for i, post in enumerate(discovered_posts, 1):
            print(f"  {i}. [{post['source_subreddit']}] {post['title'][:60]}...")
            print(f"     Score: {post['score']}, Comments: {post['num_comments']}")
        
        # Save discovered posts
        discovered_posts_data = {
            "discovered_posts": discovered_posts,
            "organization_id": organization_id,
            "search_queries": search_queries,
            "target_subreddits": selected_subreddits,
            "total_posts_found": len(discovered_posts),
            "discovery_success": True
        }
        
        with open('discovered_posts_output.json', 'w') as f:
            json.dump(discovered_posts_data, f, indent=2)
        
        print(f"\nüìÅ Saved discovered posts to discovered_posts_output.json")

except FileNotFoundError:
    print("‚ùå Error: selected_subreddits.json not found. Please run Cell 1 first.")
except Exception as e:
    print(f"‚ùå Error discovering posts: {str(e)}")

## Cell 3: Analyze Posts and Generate Responses

Iterate through all discovered posts, analyze each, and generate responses.

In [None]:
import asyncio
import json
import os
import sys

# Add the parent directory to the system path to allow importing from src
sys.path.insert(0, os.path.abspath('../'))

from src.config.settings import settings
from src.clients.reddit_client import RedditClient
from src.clients.llm_client import LLMClient
from src.storage.vector_storage import VectorStorage
from src.storage.json_storage import JsonStorage
from src.services.posting_service import PostingService

# Initialize services
json_storage = JsonStorage()
vector_storage = VectorStorage()
llm_client = LLMClient()
reddit_client = RedditClient(
    client_id=settings.REDDIT_CLIENT_ID,
    client_secret=settings.REDDIT_CLIENT_SECRET,
    username=settings.REDDIT_USERNAME,
    password=settings.REDDIT_PASSWORD
)
posting_service = PostingService(reddit_client, llm_client, vector_storage, json_storage)

# Load discovered posts from previous cell
print("Loading discovered posts...")
try:
    with open('discovered_posts_output.json', 'r') as f:
        posts_data = json.load(f)
    
    discovered_posts = posts_data['discovered_posts']
    organization_id = posts_data['organization_id']
    
    if not posts_data['discovery_success']:
        print(f"‚ùå Cannot proceed: Post discovery failed in previous cell")
    else:
        print(f"üìã Loaded {len(discovered_posts)} posts to analyze")
        
        # Analyze each post and generate responses
        all_generated_responses = []
        successful_analyses = 0
        failed_analyses = 0
        
        print(f"\nü§ñ Starting analysis and response generation...")
        
        for i, post_data in enumerate(discovered_posts, 1):
            post_id = post_data['id']
            post_title = post_data['title']
            subreddit = post_data['source_subreddit']
            
            print(f"\nüìù [{i}/{len(discovered_posts)}] Analyzing post in r/{subreddit}:")
            print(f"    Title: {post_title[:80]}...")
            print(f"    Post ID: {post_id}")
            
            try:
                # Analyze post and generate response
                success, message, response_data = await posting_service.analyze_and_generate_response(
                    post_id=post_id,
                    organization_id=organization_id,
                    tone="helpful"
                )
                
                if success and response_data:
                    # Add metadata to response
                    response_data['source_post'] = {
                        'title': post_title,
                        'subreddit': subreddit,
                        'score': post_data.get('score', 0),
                        'num_comments': post_data.get('num_comments', 0)
                    }
                    
                    all_generated_responses.append(response_data)
                    successful_analyses += 1
                    
                    print(f"    ‚úÖ Response generated successfully")
                    print(f"    Target: {response_data['target']['response_type']}")
                    print(f"    Confidence: {response_data['response']['confidence']:.2f}")
                    print(f"    Preview: {response_data['response']['content'][:100]}...")
                else:
                    failed_analyses += 1
                    print(f"    ‚ùå Analysis failed: {message}")
                    
            except Exception as e:
                failed_analyses += 1
                print(f"    ‚ùå Error analyzing post: {str(e)}")
        
        print(f"\nüìä Analysis Summary:")
        print(f"  ‚úÖ Successful: {successful_analyses}")
        print(f"  ‚ùå Failed: {failed_analyses}")
        print(f"  üìù Total responses generated: {len(all_generated_responses)}")
        
        # Save generated responses
        generated_responses_data = {
            "generated_responses": all_generated_responses,
            "organization_id": organization_id,
            "total_posts_analyzed": len(discovered_posts),
            "successful_analyses": successful_analyses,
            "failed_analyses": failed_analyses,
            "responses_ready_to_post": len(all_generated_responses),
            "generation_success": True
        }
        
        with open('generated_responses_output.json', 'w') as f:
            json.dump(generated_responses_data, f, indent=2)
        
        print(f"\nüìÅ Saved generated responses to generated_responses_output.json")
        
        if len(all_generated_responses) > 0:
            print(f"\nüöÄ Ready to post {len(all_generated_responses)} responses in the next cell!")
        else:
            print(f"\n‚ö†Ô∏è No responses were generated. Check the analysis results above.")

except FileNotFoundError:
    print("‚ùå Error: discovered_posts_output.json not found. Please run Cell 2 first.")
except Exception as e:
    print(f"‚ùå Error during analysis: {str(e)}")

## Cell 4: Post Responses

‚ö†Ô∏è **WARNING: This cell will post responses to Reddit using your configured credentials!**

Make sure you have reviewed the generated responses and are ready to post them.

In [None]:
import asyncio
import json
import os
import sys

# Add the parent directory to the system path to allow importing from src
sys.path.insert(0, os.path.abspath('../'))

from src.config.settings import settings
from src.clients.reddit_client import RedditClient
from src.storage.json_storage import JsonStorage
from src.services.posting_service import PostingService
from src.clients.llm_client import LLMClient
from src.storage.vector_storage import VectorStorage

print("‚ö†Ô∏è  WARNING: This cell will post responses to Reddit!")
print("‚ö†Ô∏è  Make sure you have reviewed the generated responses.")
print("‚ö†Ô∏è  Proceeding will use your Reddit account credentials.\n")

# Initialize services
json_storage = JsonStorage()
vector_storage = VectorStorage()
llm_client = LLMClient()
reddit_client = RedditClient(
    client_id=settings.REDDIT_CLIENT_ID,
    client_secret=settings.REDDIT_CLIENT_SECRET,
    username=settings.REDDIT_USERNAME,
    password=settings.REDDIT_PASSWORD
)
posting_service = PostingService(reddit_client, llm_client, vector_storage, json_storage)

# Load generated responses from previous cell
print("Loading generated responses...")
try:
    with open('generated_responses_output.json', 'r') as f:
        responses_data = json.load(f)
    
    generated_responses = responses_data['generated_responses']
    organization_id = responses_data['organization_id']
    
    if not responses_data['generation_success']:
        print(f"‚ùå Cannot proceed: Response generation failed in previous cell")
    elif len(generated_responses) == 0:
        print(f"‚ùå No responses to post. Please check the previous cell.")
    else:
        print(f"üìã Loaded {len(generated_responses)} responses ready to post")
        
        # Show preview of responses to be posted
        print(f"\nüìù Preview of responses to be posted:")
        for i, response_data in enumerate(generated_responses, 1):
            target = response_data['target']
            response = response_data['response']
            source_post = response_data.get('source_post', {})
            
            print(f"\n  {i}. Post: {source_post.get('title', 'Unknown')[:50]}...")
            print(f"     Subreddit: r/{response_data.get('subreddit', 'unknown')}")
            print(f"     Action: {target['response_type']}")
            print(f"     Confidence: {response['confidence']:.2f}")
            print(f"     Response preview: {response['content'][:100]}...")
        
        # Post all responses
        print(f"\nüöÄ Starting to post {len(generated_responses)} responses...")
        
        all_posting_results = []
        successful_posts = 0
        failed_posts = 0
        
        for i, response_data in enumerate(generated_responses, 1):
            target = response_data['target']
            response = response_data['response']
            source_post = response_data.get('source_post', {})
            
            target_id = target['target_id']
            response_type = target['response_type']
            response_content = response['content']
            
            print(f"\nüì§ [{i}/{len(generated_responses)}] Posting to r/{response_data.get('subreddit', 'unknown')}...")
            print(f"    Post: {source_post.get('title', 'Unknown')[:50]}...")
            print(f"    Action: {response_type}")
            
            try:
                # Post the response
                success, message, result = await posting_service.post_approved_response(
                    response_type=response_type,
                    response_content=response_content,
                    target_id=target_id
                )
                
                posting_result = {
                    "response_index": i,
                    "target_id": target_id,
                    "response_type": response_type,
                    "success": success,
                    "message": message,
                    "result": result,
                    "source_post": source_post
                }
                
                all_posting_results.append(posting_result)
                
                if success:
                    successful_posts += 1
                    print(f"    ‚úÖ Posted successfully!")
                    if result and 'permalink' in result:
                        print(f"    üîó Link: https://reddit.com{result['permalink']}")
                else:
                    failed_posts += 1
                    print(f"    ‚ùå Posting failed: {message}")
                
                # Add delay between posts to respect rate limits
                if i < len(generated_responses):
                    print(f"    ‚è≥ Waiting 10 seconds before next post...")
                    await asyncio.sleep(10)
                    
            except Exception as e:
                failed_posts += 1
                posting_result = {
                    "response_index": i,
                    "target_id": target_id,
                    "response_type": response_type,
                    "success": False,
                    "error": str(e),
                    "source_post": source_post
                }
                all_posting_results.append(posting_result)
                print(f"    ‚ùå Error posting: {str(e)}")
        
        print(f"\nüìä Posting Summary:")
        print(f"  ‚úÖ Successful posts: {successful_posts}")
        print(f"  ‚ùå Failed posts: {failed_posts}")
        print(f"  üìù Total attempts: {len(generated_responses)}")
        
        # Save posting results
        posted_responses_data = {
            "all_posting_results": all_posting_results,
            "organization_id": organization_id,
            "total_attempts": len(generated_responses),
            "successful_posts": successful_posts,
            "failed_posts": failed_posts,
            "posting_complete": True
        }
        
        with open('posted_responses_results.json', 'w') as f:
            json.dump(posted_responses_data, f, indent=2)
        
        print(f"\nüìÅ Saved posting results to posted_responses_results.json")
        
        if successful_posts > 0:
            print(f"\nüéâ Successfully posted {successful_posts} responses to Reddit!")
            print(f"üìà You can now run the analytics notebook to track engagement.")

except FileNotFoundError:
    print("‚ùå Error: generated_responses_output.json not found. Please run Cell 3 first.")
except Exception as e:
    print(f"‚ùå Error during posting: {str(e)}")