In [1]:
import argparse
import os
import sys
import logging
from dotenv import load_dotenv
import traceback2 as traceback
import json
from datetime import datetime, timezone
import gc

In [2]:
gc.collect()

0

In [3]:
os.chdir("/home/s2mudemi")

In [4]:
from research_case.analyzers.persona_analysis import PersonaAnalyzer , ExtendedPersonaAnalyzer, PersonaFieldAnalyzer
from research_case.analyzers.llm_client import LLMClient

load_dotenv()

# Setup logging
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

In [5]:
def load_and_sample_users(input_path: str, max_personas: int) -> dict:
    """
    Load user data from JSON and sample up to max_personas users.
    
    Args:
        input_path: Path to input JSON file
        max_personas: Maximum number of personas to create
        
    Returns:
        Dict containing sampled user data
    """
    try:
        with open(input_path, 'r') as f:
            all_users = json.load(f)
            
        # If max_personas is specified and less than total users, sample randomly
        if max_personas and len(all_users) > max_personas:
            import random
            sampled_user_ids = random.sample(list(all_users.keys()), max_personas)
            sampled_users = {uid: all_users[uid] for uid in sampled_user_ids}
            logger.info(f"Sampled {max_personas} users from total {len(all_users)} users")
            return sampled_users
        
        return all_users
        
    except Exception as e:
        logger.error(f"Error loading user data: {e}")
        raise

In [6]:
def main():
      
    args = {
        "input": "/home/s2mudemi/research_case/data/preprocessed/processed_users.json",
        "conversations": "/home/s2mudemi/research_case/data/preprocessed/processed_conversations.json",
        "output": "/home/s2mudemi/research_case/data",
        "n_posts": 5,
        "n_conversations": 5,
        "max_personas": 1,
        "experiment_name": "my_experiment",
        "use_random_fields": False,
        "num_fields": 5
    }
    
    input_path = args["input"]
    conversations_path = args["conversations"]
    output_path = os.path.join(args["output"], args["experiment_name"], "personas.json")

    # Validate input file
    if not os.path.exists(input_path):
        logger.error(f"Input file not found: {input_path}")
        exit(1)
    
    
    llm_client = LLMClient(model_name="llama3:70b")
    analyzer = ExtendedPersonaAnalyzer(llm_client)
    field_analyzer = PersonaFieldAnalyzer(llm_client)
    
    # Step 4: Run analysis
    try:
        logger.info("Starting persona analysis...")
        
        # Load and sample users if max_personas is specified
        sampled_users = load_and_sample_users(input_path, args["max_personas"])
        
        temp_input = os.path.join(os.path.dirname(input_path), "temp_sampled_users.json")
        with open(temp_input, 'w') as f:
            json.dump(sampled_users, f)
                
        num_fields = field_analyzer.detect_the_fields(
            posts_path=temp_input,
            conversations_path=conversations_path, 
            n_posts=args["n_posts"],
            n_conversations=args["n_conversations"])
        
        print("printing_number of fields: ", num_fields)

        for i in range(3):
            print(f"Iteration {i}")
            analyzer.analyze_persona_from_files(
                posts_path=temp_input,
                conversations_path=conversations_path,
                output_path=output_path,
                founded_fields=num_fields,
                n_posts=args["n_posts"],
                n_conversations=args["n_conversations"])
            
        os.remove(temp_input)
            
        logger.info(f"Persona analysis with fields: {num_fields} completed. Results saved to {output_path}")
            
    except Exception as e:
            logger.error("Failed to analyze personas:")
            logger.error(traceback.format_exc())
            if 'temp_input' in locals() and os.path.exists(temp_input):
                os.remove(temp_input)
            raise
            exit(1)

In [7]:
if __name__ == "__main__":
    main()

INFO:research_case.analyzers.persona_analysis:Initialized PersonaPromptGenerator
INFO:research_case.analyzers.persona_analysis:Initialized PersonaPromptGenerator
INFO:__main__:Starting persona analysis...
INFO:__main__:Sampled 1 users from total 2 users
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:research_case.analyzers.persona_analysis:Persona field analysis complete. Found fields: ['language_formality', 'opinion_expression', 'narrative_voice', 'emotional_expression', 'vocabulary_range', 'controversy_handling', 'value_signals', 'brevity_style', 'punctuation_style']


printing_number of fields:  ['language_formality', 'opinion_expression', 'narrative_voice', 'emotional_expression', 'vocabulary_range', 'controversy_handling', 'value_signals', 'brevity_style', 'punctuation_style']
Iteration 0


INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:research_case.analyzers.persona_analysis:Successfully saved JSON file (1750 bytes) to /home/s2mudemi/research_case/data/my_experiment/personas.json
INFO:research_case.analyzers.persona_analysis:Persona analysis complete. Results saved to /home/s2mudemi/research_case/data/my_experiment/personas.json


Iteration 1


INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:research_case.analyzers.persona_analysis:Successfully saved JSON file (2261 bytes) to /home/s2mudemi/research_case/data/my_experiment/personas.json
INFO:research_case.analyzers.persona_analysis:Persona analysis complete. Results saved to /home/s2mudemi/research_case/data/my_experiment/personas.json


Iteration 2


INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"
INFO:research_case.analyzers.persona_analysis:Successfully saved JSON file (4091 bytes) to /home/s2mudemi/research_case/data/my_experiment/personas.json
INFO:research_case.analyzers.persona_analysis:Persona analysis complete. Results saved to /home/s2mudemi/research_case/data/my_experiment/personas.json
INFO:__main__:Persona analysis with fields: ['language_formality', 'opinion_expression', 'narrative_voice', 'emotional_expression', 'vocabulary_range', 'controversy_handling', 'value_signals', 'brevity_style', 'punctuation_style'] completed. Results saved to /home/s2mudemi/research_case/data/my_experiment/personas.json


In [None]:
pwd