In [1]:
import os
import praw
import pandas as pd
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Initialize the Reddit instance with your credentials
reddit = praw.Reddit(
    client_id=os.getenv("REDDIT_CLIENT_ID"),
    client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
    user_agent="Vizuara-GPT5-Analysis by u/LazyStranger5730" # IMPORTANT: Change this
)

print("Successfully authenticated with Reddit API.")

Successfully authenticated with Reddit API.


In [2]:
# The URL of the Reddit AMA 

submission_url = "https://www.reddit.com/r/ChatGPT/comments/1mkae1l/gpt5_ama_with_openais_sam_altman_and_some_of_the/" 

submission = reddit.submission(url=submission_url)

# This part is crucial for getting ALL comments, including nested ones
print("Fetching all comments... This might take a moment.")
submission.comments.replace_more(limit=None) 

# Loop through all comments and store their text in a list
all_comments = []
for comment in submission.comments.list():
    # We only care about the comment body for text clustering
    all_comments.append(comment.body)

print(f"Successfully collected {len(all_comments)} comments.")

Fetching all comments... This might take a moment.
Successfully collected 5340 comments.


In [3]:
# Create a pandas DataFrame
df = pd.DataFrame(all_comments, columns=['comment_text'])

# Drop any rows that might be empty or deleted
df.dropna(inplace=True)
df = df[df['comment_text'] != '[deleted]']
df = df[df['comment_text'] != '[removed]']


# Save the DataFrame to a CSV file
output_file = 'reddit_gpt5_comments.csv'
df.to_csv(output_file, index=False)

print(f"Data saved to {output_file}")

Data saved to reddit_gpt5_comments.csv
