In [22]:
# Import all required libraries
import os
from dotenv import load_dotenv
import praw
import google.generativeai as genai
from google.generativeai import GenerativeModel
import re

print("✅ All libraries imported successfully")

✅ All libraries imported successfully


In [23]:
# Load environment variables from env.txt file
load_dotenv(dotenv_path="env.txt", override=True)

# Get API credentials
REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID")
REDDIT_CLIENT_SECRET = os.getenv("REDDIT_CLIENT_SECRET")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

# Initialize Reddit API
reddit = praw.Reddit(
    client_id=REDDIT_CLIENT_ID,
    client_secret=REDDIT_CLIENT_SECRET,
    user_agent="user persona project v1.0"
)

# Initialize Gemini AI
genai.configure(api_key=GEMINI_API_KEY)
model = GenerativeModel("gemini-2.0-flash-exp")

print("✅ Reddit API initialized")
print("✅ Gemini AI initialized")

✅ Reddit API initialized
✅ Gemini AI initialized


In [24]:
def extract_username_from_url(url):
    """Extract username from Reddit URL or return as-is if it's already a username"""
    url = url.strip("/")
    if "/user/" in url:
        return url.split("/user/")[1].split("/")[0]
    elif "/u/" in url:
        return url.split("/u/")[1].split("/")[0]
    else:
        return url  # Assume it's just the username

# Test the function
test_url = "https://www.reddit.com/user/testuser/"
print(f"Extracted username: {extract_username_from_url(test_url)}")

Extracted username: testuser


In [25]:
def fetch_user_data(username):
    """Fetch user posts, comments, and profile image from Reddit"""
    try:
        user = reddit.redditor(username)
        
        # Fetch recent posts
        posts = []
        print(f"📝 Fetching posts for {username}...")
        for post in user.submissions.new(limit=20):
            posts.append({
                "title": post.title,
                "text": post.selftext,
                "url": f"https://reddit.com{post.permalink}",
                "subreddit": post.subreddit.display_name,
                "score": post.score
            })
        
        # Fetch recent comments
        comments = []
        print(f"💬 Fetching comments for {username}...")
        for comment in user.comments.new(limit=20):
            comments.append({
                "comment": comment.body,
                "url": f"https://reddit.com{comment.permalink}",
                "subreddit": comment.subreddit.display_name,
                "score": comment.score
            })

        # Get profile image
        profile_img = getattr(user, "icon_img", None)
        if not profile_img or profile_img == "":
            profile_img = "https://www.redditstatic.com/avatars/avatar_default_02_A5A4A4.png"
        
        print(f"✅ Found {len(posts)} posts and {len(comments)} comments")
        return posts, comments, profile_img
        
    except Exception as e:
        print(f"❌ Error fetching user data: {e}")
        return [], [], "https://www.redditstatic.com/avatars/avatar_default_02_A5A4A4.png"

# Test with a username (you can change this)
# posts, comments, profile_img = fetch_user_data("spez")

In [26]:
def generate_persona_with_gemini(posts, comments, username):
    """Generate persona using Gemini AI"""
    # Combine all posts and comments into one text
    all_texts = []

    for p in posts:
        all_texts.append(f"POST in r/{p['subreddit']}: {p['title']}\n{p['text']}")
    
    for c in comments:
        all_texts.append(f"COMMENT in r/{c['subreddit']}: {c['comment']}")

    combined_text = "\n\n".join(all_texts)

    # Create the prompt for Gemini
    prompt = f"""
You are an expert user research analyst. Based on the Reddit posts and comments below from user '{username}', generate a structured user persona.

IMPORTANT: Use this EXACT format and structure:

Name: [Generate a realistic name]
Age: [Age range like "25-30"]
Occupation: [Infer from content]
Status: [Single/Married/In Relationship/etc]
Location: [City, Country - infer from content]
User Archetype: [The Creator/Explorer/Caregiver/etc]
Personality Keywords: [trait1, trait2, trait3, trait4, trait5]

---

Motivations
- [Motivation 1]
- [Motivation 2]
- [Motivation 3]

Behaviour & Habits
- [Behavior 1]
- [Behavior 2]
- [Behavior 3]

Frustrations
- [Frustration 1]
- [Frustration 2]
- [Frustration 3]

Goals & Needs
- [Goal 1]
- [Goal 2]
- [Goal 3]

Personality Type (MBTI spectrum)
- Introvert / Extrovert: [number from 0-100, where 0=very introverted, 100=very extroverted]
- Sensing / Intuition: [number from 0-100, where 0=very sensing, 100=very intuitive]
- Thinking / Feeling: [number from 0-100, where 0=very thinking, 100=very feeling]
- Judging / Perceiving: [number from 0-100, where 0=very judging, 100=very perceiving]

Reddit data to analyze:
{combined_text}
"""
    
    try:
        print("🤖 Generating persona with Gemini AI...")
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        print(f"❌ Error generating persona: {e}")
        return None

In [27]:
def parse_persona_text(persona_text):
    """Parse the generated persona text into structured data"""
    sections = {
        "Name": "", "Age": "", "Occupation": "", "Status": "", "Location": "",
        "User Archetype": "", "Personality Keywords": [],
        "Motivations": [], "Behaviour & Habits": [], "Frustrations": [], "Goals & Needs": [],
        "Personality": {
            "Introvert/Extrovert": 50,
            "Sensing/Intuition": 50,
            "Thinking/Feeling": 50,
            "Judging/Perceiving": 50
        }
    }

    lines = persona_text.split('\n')
    current_section = None

    for line in lines:
        line = line.strip()
        
        # Parse basic info
        if line.startswith("Name:"):
            sections["Name"] = line.split(":", 1)[1].strip()
        elif line.startswith("Age:"):
            sections["Age"] = line.split(":", 1)[1].strip()
        elif line.startswith("Occupation:"):
            sections["Occupation"] = line.split(":", 1)[1].strip()
        elif line.startswith("Status:"):
            sections["Status"] = line.split(":", 1)[1].strip()
        elif line.startswith("Location:"):
            sections["Location"] = line.split(":", 1)[1].strip()
        elif line.startswith("User Archetype:"):
            sections["User Archetype"] = line.split(":", 1)[1].strip()
        elif line.startswith("Personality Keywords:"):
            keywords = line.split(":", 1)[1].strip()
            sections["Personality Keywords"] = [k.strip() for k in keywords.split(",")]
        
        # Parse section headers
        elif line.startswith("Motivations"):
            current_section = "Motivations"
        elif line.startswith("Behaviour & Habits"):
            current_section = "Behaviour & Habits"
        elif line.startswith("Frustrations"):
            current_section = "Frustrations"
        elif line.startswith("Goals & Needs"):
            current_section = "Goals & Needs"
        elif line.startswith("Personality Type"):
            current_section = "Personality"
        
        # Parse list items
        elif line.startswith("- ") and current_section in ["Motivations", "Behaviour & Habits", "Frustrations", "Goals & Needs"]:
            sections[current_section].append(line[2:].strip())
        
        # Parse personality traits
        elif "Introvert / Extrovert:" in line:
            try:
                numbers = re.findall(r'\d+', line)
                if numbers:
                    sections["Personality"]["Introvert/Extrovert"] = int(numbers[0])
            except:
                pass
        elif "Sensing / Intuition:" in line:
            try:
                numbers = re.findall(r'\d+', line)
                if numbers:
                    sections["Personality"]["Sensing/Intuition"] = int(numbers[0])
            except:
                pass
        elif "Thinking / Feeling:" in line:
            try:
                numbers = re.findall(r'\d+', line)
                if numbers:
                    sections["Personality"]["Thinking/Feeling"] = int(numbers[0])
            except:
                pass
        elif "Judging / Perceiving:" in line:
            try:
                numbers = re.findall(r'\d+', line)
                if numbers:
                    sections["Personality"]["Judging/Perceiving"] = int(numbers[0])
            except:
                pass

    return sections

In [28]:
def generate_persona_html(username, data, profile_image_url):
    """Generate HTML persona card - FIXED VERSION"""
    
    # Generate personality keywords HTML
    keywords_html = ""
    for trait in data.get('Personality Keywords', []):
        keywords_html += f'<span class="trait-tag">{trait}</span>'
    
    # Generate motivations HTML
    motivations_html = ""
    for motivation in data.get('Motivations', []):
        motivations_html += f'<li>{motivation}</li>'
    
    # Generate behaviors HTML
    behaviors_html = ""
    for behavior in data.get('Behaviour & Habits', []):
        behaviors_html += f'<li>{behavior}</li>'
    
    # Generate frustrations HTML
    frustrations_html = ""
    for frustration in data.get('Frustrations', []):
        frustrations_html += f'<li>{frustration}</li>'
    
    # Generate goals HTML
    goals_html = ""
    for goal in data.get('Goals & Needs', []):
        goals_html += f'<li>{goal}</li>'
    
    # Get personality values
    introvert_extrovert = data['Personality'].get('Introvert/Extrovert', 50)
    sensing_intuition = data['Personality'].get('Sensing/Intuition', 50)
    thinking_feeling = data['Personality'].get('Thinking/Feeling', 50)
    judging_perceiving = data['Personality'].get('Judging/Perceiving', 50)
    
    # Generate the complete HTML
    html = f"""<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>{username} - User Persona</title>
    <style>
        body {{
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            background-color: #f5f5f5;
            margin: 0;
            padding: 20px;
        }}
        .container {{
            max-width: 1200px;
            margin: 0 auto;
            background-color: white;
            border-radius: 10px;
            box-shadow: 0 4px 6px rgba(0,0,0,0.1);
            overflow: hidden;
        }}
        .header {{
            background: linear-gradient(135deg, #ff6b35, #f7931e);
            color: white;
            padding: 30px;
            text-align: center;
        }}
        .content {{
            display: flex;
            padding: 30px;
            gap: 30px;
        }}
        .left-panel {{
            flex: 1;
            max-width: 300px;
        }}
        .profile-img {{
            width: 200px;
            height: 200px;
            border-radius: 50%;
            object-fit: cover;
            border: 4px solid #ff6b35;
            margin: 0 auto 20px auto;
            display: block;
        }}
        .basic-info {{
            background-color: #f8f9fa;
            padding: 20px;
            border-radius: 8px;
            margin-bottom: 20px;
        }}
        .basic-info h3 {{
            margin: 0 0 15px 0;
            color: #333;
            font-size: 18px;
        }}
        .info-item {{
            display: flex;
            justify-content: space-between;
            margin-bottom: 10px;
            padding: 5px 0;
            border-bottom: 1px solid #eee;
        }}
        .info-label {{
            font-weight: bold;
            color: #666;
        }}
        .right-panel {{
            flex: 2;
        }}
        .traits {{
            margin-bottom: 30px;
        }}
        .trait-tag {{
            display: inline-block;
            background-color: #ff6b35;
            color: white;
            padding: 6px 12px;
            margin: 4px;
            border-radius: 20px;
            font-size: 12px;
            font-weight: bold;
        }}
        .section {{
            margin-bottom: 30px;
        }}
        .section h2 {{
            color: #ff6b35;
            border-bottom: 2px solid #ff6b35;
            padding-bottom: 8px;
            margin-bottom: 15px;
            font-size: 20px;
        }}
        .section ul {{
            list-style: none;
            padding: 0;
        }}
        .section li {{
            background-color: #f8f9fa;
            padding: 10px 15px;
            margin-bottom: 8px;
            border-left: 4px solid #ff6b35;
            border-radius: 4px;
        }}
        .personality-section {{
            background-color: #f8f9fa;
            padding: 20px;
            border-radius: 8px;
        }}
        .personality-trait {{
            margin-bottom: 15px;
        }}
        .trait-labels {{
            display: flex;
            justify-content: space-between;
            margin-bottom: 5px;
            font-size: 14px;
            color: #666;
        }}
        .progress-bar {{
            height: 20px;
            background-color: #e0e0e0;
            border-radius: 10px;
            overflow: hidden;
        }}
        .progress-fill {{
            height: 100%;
            background: linear-gradient(90deg, #ff6b35, #f7931e);
            transition: width 0.3s ease;
        }}
        h1 {{
            margin: 0;
            font-size: 32px;
        }}
        .subtitle {{
            font-size: 16px;
            opacity: 0.9;
            margin-top: 10px;
        }}
    </style>
</head>
<body>
    <div class="container">
        <div class="header">
            <h1>{username}</h1>
            <div class="subtitle">User Persona Analysis</div>
        </div>
        
        <div class="content">
            <div class="left-panel">
                <img src="{profile_image_url}" alt="Profile" class="profile-img">
                
                <div class="basic-info">
                    <h3>Basic Information</h3>
                    <div class="info-item">
                        <span class="info-label">Age:</span>
                        <span>{data.get('Age', 'N/A')}</span>
                    </div>
                    <div class="info-item">
                        <span class="info-label">Occupation:</span>
                        <span>{data.get('Occupation', 'N/A')}</span>
                    </div>
                    <div class="info-item">
                        <span class="info-label">Status:</span>
                        <span>{data.get('Status', 'N/A')}</span>
                    </div>
                    <div class="info-item">
                        <span class="info-label">Location:</span>
                        <span>{data.get('Location', 'N/A')}</span>
                    </div>
                    <div class="info-item">
                        <span class="info-label">Archetype:</span>
                        <span>{data.get('User Archetype', 'N/A')}</span>
                    </div>
                </div>
                
                <div class="personality-section">
                    <h3>Personality (MBTI)</h3>
                    <div class="personality-trait">
                        <div class="trait-labels">
                            <span>Introvert</span>
                            <span>Extrovert</span>
                        </div>
                        <div class="progress-bar">
                            <div class="progress-fill" style="width: {introvert_extrovert}%"></div>
                        </div>
                    </div>
                    <div class="personality-trait">
                        <div class="trait-labels">
                            <span>Sensing</span>
                            <span>Intuition</span>
                        </div>
                        <div class="progress-bar">
                            <div class="progress-fill" style="width: {sensing_intuition}%"></div>
                        </div>
                    </div>
                    <div class="personality-trait">
                        <div class="trait-labels">
                            <span>Thinking</span>
                            <span>Feeling</span>
                        </div>
                        <div class="progress-bar">
                            <div class="progress-fill" style="width: {thinking_feeling}%"></div>
                        </div>
                    </div>
                    <div class="personality-trait">
                        <div class="trait-labels">
                            <span>Judging</span>
                            <span>Perceiving</span>
                        </div>
                        <div class="progress-bar">
                            <div class="progress-fill" style="width: {judging_perceiving}%"></div>
                        </div>
                    </div>
                </div>
            </div>
            
            <div class="right-panel">
                <div class="traits">
                    <h2>Personality Keywords</h2>
                    {keywords_html}
                </div>
                
                <div class="section">
                    <h2>Motivations</h2>
                    <ul>
                        {motivations_html}
                    </ul>
                </div>
                
                <div class="section">
                    <h2>Behaviour & Habits</h2>
                    <ul>
                        {behaviors_html}
                    </ul>
                </div>
                
                <div class="section">
                    <h2>Frustrations</h2>
                    <ul>
                        {frustrations_html}
                    </ul>
                </div>
                
                <div class="section">
                    <h2>Goals & Needs</h2>
                    <ul>
                        {goals_html}
                    </ul>
                </div>
            </div>
        </div>
    </div>
</body>
</html>"""
    
    filename = f"{username}_persona.html"
    with open(filename, "w", encoding="utf-8") as f:
        f.write(html)
    print(f"✅ HTML persona saved as: {filename}")

In [29]:
# Main execution - run this cell to generate a persona
def generate_persona(reddit_url_or_username):
    """Main function to generate persona"""
    
    # Extract username
    username = extract_username_from_url(reddit_url_or_username)
    print(f"📊 Analyzing user: {username}")
    
    # Fetch Reddit data
    posts, comments, profile_img = fetch_user_data(username)
    
    if not posts and not comments:
        print("❌ No data found for this user. Please check the username.")
        return None
    
    # Generate persona with Gemini
    persona_text = generate_persona_with_gemini(posts, comments, username)
    
    if not persona_text:
        print("❌ Failed to generate persona")
        return None
    
    # Parse the persona text
    print("📋 Parsing persona data...")
    parsed_persona = parse_persona_text(persona_text)
    
    # Generate HTML
    print("🎨 Creating HTML persona card...")
    generate_persona_html(username, parsed_persona, profile_img)
    
    print(f"✅ Persona generation complete!")
    print(f"📄 Open {username}_persona.html in your browser to view the result")
    
    return parsed_persona

# Example usage - replace with actual username
# result = generate_persona("spez")  # Reddit CEO for testing
# or
# result = generate_persona("https://www.reddit.com/user/spez/")

In [30]:
reddit_input = input("Enter Reddit profile URL or username: ").strip()
result = generate_persona(reddit_input)

if result:
    print("\n" + "="*50)
    print("📊 PERSONA SUMMARY:")
    print("="*50)
    print(f"Name: {result.get('Name', 'N/A')}")
    print(f"Age: {result.get('Age', 'N/A')}")
    print(f"Occupation: {result.get('Occupation', 'N/A')}")
    print(f"Location: {result.get('Location', 'N/A')}")
    print(f"Archetype: {result.get('User Archetype', 'N/A')}")
    print(f"Keywords: {', '.join(result.get('Personality Keywords', []))}")

Enter Reddit profile URL or username:  https://www.reddit.com/user/kojied/


📊 Analyzing user: kojied
📝 Fetching posts for kojied...
💬 Fetching comments for kojied...
✅ Found 20 posts and 20 comments
🤖 Generating persona with Gemini AI...
📋 Parsing persona data...
🎨 Creating HTML persona card...
✅ HTML persona saved as: kojied_persona.html
✅ Persona generation complete!
📄 Open kojied_persona.html in your browser to view the result

📊 PERSONA SUMMARY:
Name: Kenji Tanaka
Age: 28-33
Occupation: iOS Developer
Location: New York City, USA
Archetype: The Explorer
Keywords: Analytical, Curious, Reflective, Tech-Savvy, Environmentally-Conscious
