In [None]:
import os
import json
from fpdf import FPDF
import openai
import praw
from dotenv import load_dotenv, find_dotenv

# Load environment variables
load_dotenv(find_dotenv())

# Reddit API Credentials
reddit = praw.Reddit(
    client_id='RIG4mlm3BHo2AT-0bLOEYg',
    client_secret='ufS3x-JSj9HqBRXbE63CBvu9KMpk1Q',
    user_agent='Reddit Persona Script by Santosh'
)

# OpenAI API setup
client = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))


def extract_username(profile_url):
    """Extract the Reddit username from the profile URL."""
    parts = profile_url.rstrip('/').split('/')
    if 'user' in parts:
        return parts[parts.index('user') + 1]
    return None


def scrape_user_data(username, comment_limit=10, post_limit=5):
    """Scrape comments and posts from the Reddit user."""
    user = reddit.redditor(username)
    data = {'comments': [], 'posts': []}

    for comment in user.comments.new(limit=comment_limit):
        data['comments'].append({
            'text': comment.body,
            'subreddit': str(comment.subreddit),
            'url': f"https://www.reddit.com{comment.permalink}"
        })

    for submission in user.submissions.new(limit=post_limit):
        data['posts'].append({
            'title': submission.title,
            'body': submission.selftext,
            'subreddit': str(submission.subreddit),
            'url': f"https://www.reddit.com{submission.permalink}"
        })

    return data


def combine_text(data):
    """Combine posts and comments into a single text block."""
    combined_text = "=== POSTS ===\n\n"
    for post in data['posts']:
        combined_text += (
            f"Title: {post['title']}\n"
            f"Content: {post['body']}\n"
            f"Subreddit: {post['subreddit']}\n"
            f"URL: {post['url']}\n\n"
        )

    combined_text += "=== COMMENTS ===\n\n"
    for comment in data['comments']:
        combined_text += (
            f"Comment: {comment['text']}\n"
            f"Subreddit: {comment['subreddit']}\n"
            f"URL: {comment['url']}\n\n"
        )

    return combined_text


def generate_persona_with_citations(combined_text):
    """Generate a persona summary with citations using OpenAI GPT-4."""
    prompt = f"""
You are an AI that builds a User Persona based on Reddit posts and comments.

Instructions:
- Identify Personality Traits
- Identify Interests
- Describe Behavior & Habits
- List Motivations
- Identify Frustrations
- Describe Goals & Needs
- Provide example quotes from posts/comments for each insight
- Mention subreddit names wherever possible

Here is the Reddit user's content:

{combined_text}

Return the analysis as a well-structured User Persona.
"""

    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=2000,
        temperature=0.7
    )
    return response.choices[0].message.content


def save_as_txt(username, persona_text):
    """Save the persona text as a .txt file."""
    filename = f"{username}_persona.txt"
    with open(filename, 'w', encoding='utf-8') as file:
        file.write(persona_text)
    print(f"âœ… Persona saved as TXT: {filename}")


def save_as_pdf(username, persona_text):
    """Save the persona text as a .pdf file."""
    pdf = FPDF()
    pdf.add_page()
    pdf.add_font(
        'DejaVu', '', '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', uni=True
    )
    pdf.set_font('DejaVu', '', 12)

    for line in persona_text.split('\n'):
        pdf.multi_cell(0, 10, line)

    filename = f"{username}_persona.pdf"
    pdf.output(filename)
    print(f"âœ… Persona saved as PDF: {filename}")


def save_as_json(username, persona_text, scraped_data):
    """Save the persona text along with scraped data as a JSON file."""
    data = {
        'username': username,
        'persona': persona_text,
        'scraped_data': scraped_data
    }
    filename = f"{username}_persona.json"
    with open(filename, 'w', encoding='utf-8') as file:
        json.dump(data, file, indent=4)
    print(f"âœ… Persona saved as JSON: {filename}")


if __name__ == "__main__":
    reddit_url = input("Enter Reddit Profile URL: ")
    username = extract_username(reddit_url)
    print(f"ðŸ“¥ Extracted username: {username}")

    scraped_data = scrape_user_data(username, comment_limit=10, post_limit=5)
    combined_text = combine_text(scraped_data)
    persona_output = generate_persona_with_citations(combined_text)

    save_as_txt(username, persona_output)
    save_as_pdf(username, persona_output)
    save_as_json(username, persona_output, scraped_data)

    print("ðŸŽ‰ All outputs generated successfully!")


Enter Reddit Profile URL:  https://www.reddit.com/user/kojied/comments/


ðŸ“¥ Extracted username: kojied
âœ… Persona saved as TXT: kojied_persona.txt
âœ… Persona saved as PDF: kojied_persona.pdf
âœ… Persona saved as JSON: kojied_persona.json
ðŸŽ‰ All outputs generated successfully!


In [19]:
!git --version


git version 2.25.1


In [None]:
!git init


In [None]:
!git remote add origin https://github.com/Santosh9519424222/reddit-persona-generator.git
!git add .
!git commit -m "Initial commit - Reddit User Persona Project"
!git branch -M main
!git push -u origin main
