In [15]:
import os
import json
from fpdf import FPDF
import openai
import praw
from dotenv import load_dotenv, find_dotenv

# Load environment variables
load_dotenv(find_dotenv())

# Reddit API Credentials
reddit = praw.Reddit(
    client_id='RIG4mlm3BHo2AT-0bLOEYg',
    client_secret='ufS3x-JSj9HqBRXbE63CBvu9KMpk1Q',
    user_agent='Reddit Persona Script by Santosh'
)
# OpenAI API setup
client = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

# Extract username from URL
def extract_username(profile_url):
    parts = profile_url.rstrip('/').split('/')
    if 'user' in parts:
        return parts[parts.index('user') + 1]
    return None

# Scrape posts & comments
def scrape_user_data(username, comment_limit=10, post_limit=5):
    user = reddit.redditor(username)
    data = {'comments': [], 'posts': []}

    for comment in user.comments.new(limit=comment_limit):
        data['comments'].append({
            'text': comment.body,
            'subreddit': str(comment.subreddit),
            'url': f"https://www.reddit.com{comment.permalink}"
        })

    for submission in user.submissions.new(limit=post_limit):
        data['posts'].append({
            'title': submission.title,
            'body': submission.selftext,
            'subreddit': str(submission.subreddit),
            'url': f"https://www.reddit.com{submission.permalink}"
        })

    return data

# Combine text
def combine_text(data):
    combined_text = "=== POSTS ===\n\n"
    for post in data['posts']:
        combined_text += f"Title: {post['title']}\nContent: {post['body']}\nSubreddit: {post['subreddit']}\nURL: {post['url']}\n\n"

    combined_text += "=== COMMENTS ===\n\n"
    for comment in data['comments']:
        combined_text += f"Comment: {comment['text']}\nSubreddit: {comment['subreddit']}\nURL: {comment['url']}\n\n"

    return combined_text

# Generate persona with OpenAI
def generate_persona_with_citations(combined_text):
    prompt = f"""
You are an AI that builds a User Persona based on Reddit posts and comments.

Instructions:
- Identify Personality Traits
- Identify Interests
- Describe Behavior & Habits
- List Motivations
- Identify Frustrations
- Describe Goals & Needs
- Provide example quotes from posts/comments for each insight
- Mention subreddit names wherever possible

Here is the Reddit user's content:

{combined_text}

Return the analysis as a well-structured User Persona.
"""

    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=2000,
        temperature=0.7
    )
    return response.choices[0].message.content

# Save as TXT
def save_as_txt(username, persona_text):
    filename = f"{username}_persona.txt"
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(persona_text)
    print(f"✅ Persona saved as TXT: {filename}")

from fpdf import FPDF

def save_as_pdf(username, persona_text):
    pdf = FPDF()
    pdf.add_page()
    
    # Set font that supports UTF-8
    pdf.add_font('DejaVu', '', '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', uni=True)
    pdf.set_font('DejaVu', '', 12)
    
    # Write text line by line
    for line in persona_text.split('\n'):
        pdf.multi_cell(0, 10, line)
    
    filename = f"{username}_persona.pdf"
    pdf.output(filename)
    print(f"✅ Persona saved as PDF: {filename}")

# Save as JSON
def save_as_json(username, persona_text, scraped_data):
    data = {
        'username': username,
        'persona': persona_text,
        'scraped_data': scraped_data
    }
    filename = f"{username}_persona.json"
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=4)
    print(f"✅ Persona saved as JSON: {filename}")

# ======= MAIN EXECUTION =======
if __name__ == "__main__":
    reddit_url = input("Enter Reddit Profile URL: ")
    username = extract_username(reddit_url)
    print(f"📥 Extracted username: {username}")

    scraped_data = scrape_user_data(username, comment_limit=10, post_limit=5)
    combined_text = combine_text(scraped_data)
    persona_output = generate_persona_with_citations(combined_text)

    # Save outputs
    save_as_txt(username, persona_output)      # TXT output
    save_as_pdf(username, persona_output)      # PDF output
    save_as_json(username, persona_output, scraped_data)  # JSON output

    print("🎉 All outputs generated successfully!")


Enter Reddit Profile URL:  https://www.reddit.com/user/kojied/comments/


📥 Extracted username: kojied
✅ Persona saved as TXT: kojied_persona.txt
✅ Persona saved as PDF: kojied_persona.pdf
✅ Persona saved as JSON: kojied_persona.json
🎉 All outputs generated successfully!


In [19]:
!git --version


git version 2.25.1


In [None]:
!git init


In [None]:
!git remote add origin https://github.com/Santosh9519424222/reddit-persona-generator.git
!git add .
!git commit -m "Initial commit - Reddit User Persona Project"
!git branch -M main
!git push -u origin main
