In [None]:
# Install required libraries
!pip install praw

In [None]:
import praw
import pandas as pd
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment import SentimentIntensityAnalyzer
import operator

In [None]:
# Initialize Reddit API
reddit = praw.Reddit(
    client_id="INFO HERE",
    client_secret="INFO HERE",
    user_agent="INFO HERE"
)

# Function to create an expanded list of comments for a submission
def create_expand_comments_list(submission):
    submission.comments.replace_more(limit=None)
    return comment_list_to_edge_list(submission.comments.list(), submission.author)

# Function to create an edge list for a subreddit
def create_edge_list_for_subreddit(subreddit, list_type, limit):
    subr = reddit.subreddit(subreddit)
    if not hasattr(subr, list_type):
        raise Exception("what are you trying to do :(")
    listed = getattr(subr, list_type)(limit=limit)
    output = pd.concat([create_expand_comments_list(s) for s in listed])
    return output

# Function to get comments for a submission
def get_submission_comments(submission_id):
    submission = reddit.submission(submission_id)
    submission.comments.replace_more(limit=None)
    all_comments = submission.comments.list()
    df = pd.DataFrame([vars(comment) for comment in all_comments])
    df['post_title'] = submission.title
    df['post_content'] = submission.selftext
    df['post_author_id'] = submission.author.id
    df['post_author_name'] = submission.author.name
    df['subreddit'] = submission.subreddit.display_name
    df["parent_id"] = df["parent_id"].apply(lambda row: row[3:])
    df = df.set_index("id")
    df_joined = df.join(df, on = "parent_id", rsuffix = "_parent")
    return df_joined


def to_edge_list(df, use_post_parent, extra_fields = []):
    edge_list = df.copy()
    if use_post_parent:
        edge_list["author_parent_fixed"] = edge_list["author_parent"].fillna(edge_list["post_author_name"])
    else:
        edge_list["author_parent_fixed"] = edge_list["author_parent"].dropna()
    edge_list = edge_list[["author", "author_parent_fixed"] + extra_fields]
    edge_list = edge_list.rename(columns = {'author': 'Source', 'author_parent_fixed': 'Target'})
    return edge_list

def get_submissions_comments(ids):
    return pd.concat([get_submission_comments(i) for i in ids])


# Function that builds the edge list structure for the comment chain.
def to_comment_edge_list(df, use_post_parent, extra_fields = []):
    edge_list = df.copy()
    edge_list = edge_list[["id", "parent_id"] + extra_fields]
    edge_list = edge_list.rename(columns = {'id': 'Source', 'parent_id': 'Target'})
    return edge_list
# Function that builds the node list structure for the comment chain.
def to_comment_node_list(df, extra_fields = []):
    node_list = df.copy()
    node_list = node_list[["id", "author"] + extra_fields]
    node_list['author'] = node_list['author'].fillna('DELETED')
    node_list = node_list.rename(columns = {'id': 'Id', 'author': 'Label'})
    return node_list

In [None]:
data = get_submissions_comments(["reddit post id"])
df = data[["author", "parent_id", "author_parent", "post_author_id", "post_author_name", "score", "body", "post_title", "post_content", "subreddit"]]


# Create the data output.
output_gephy_edge = to_comment_edge_list(df, True)
output_gephy_edge.dropna().to_csv('gephy_comment_edge_all_mention.csv', index=False)
output_gephy_node = to_comment_node_list(df)
output_gephy_node.dropna().to_csv('gephy_comment_node_all_mention.csv', index=False)
