In [138]:
import praw
import pprint
import textwrap
import json
from hashlib import sha1
from graphviz import Digraph
from credentials import ID, SECRET
from praw.models import MoreComments as more

In [1]:
reddit = praw.Reddit(
    client_id=ID,
    client_secret=SECRET,
    user_agent="The Rhetor Project"
)

post = reddit.submission(id="17llow9")
topic = post.title[5:]
desc = post.selftext
comments = post.comments

In [147]:
pruned = [0, 0, 0]
vis = Digraph(format='png')

def process(parent, comment):
    cur = {
        "state": comment.body,
        "id": comment.id,
        "name": sha1(f"{comment.author.name}".encode()).hexdigest(),
        "depth": comment.depth+1,
        "rank": comment.author_flair_text[:-1] if comment.author_flair_text else '0',
        "score": comment.score if not comment.score_hidden else 0,
        "spicy": bool(comment.controversiality),
        "agree": None,
        "replies": []
    }
    
    # Prune
    if not cur["spicy"] and cur["score"] < 3:
        pruned[0] += 1; return None
    if len(comment.body) < 50:
        pruned[1] += 1; return None

    for r in comment.replies:
        if isinstance(r, more) or r.author is None: continue
        replyThread = process(cur["id"], r)  # Recurse
        if replyThread: cur["replies"].append(replyThread)

    # Depth prune
    if not cur["replies"] and cur["depth"] < 3: 
        pruned[2] += 1; return None


    vis.node(cur["id"], label=cur["state"], shape='box') # Add node
    vis.node(cur["id"], label=f"{textwrap.fill(cur['state'], 25)[:100]}{'...' if len(cur['state'])>100 else ''}", shape='box')

    edge_color = 'black' if cur["agree"] is None else ('green' if cur["agree"] else 'red')
    edge_label = '?' if cur["agree"] is None else ('Agree' if cur["agree"] else 'Disagree')
    
    # Create edge 
    vis.edge(parent, cur["id"], label=edge_label, color=edge_color, fontcolor=edge_color)

    return cur


def traverse(comments):
    replies = []
    vis.node("root", label=tree["root"]["state"], shape='box')  # Add root
    for c in comments:
        if isinstance(c, more) or c.stickied: continue
        if c.author is None: continue
        replies.append(process("root", c))
    return replies

tree = {
    "root": {
        "state": topic,
        "replies": traverse(comments)
    }
}

In [149]:
pp = pprint.PrettyPrinter(indent=4, sort_dicts=False)
# pp.pprint(tree)
json.dump(tree, open("tree.json", "w"), indent=4, sort_keys=False)

print(
	"Pruned comments:\n"
	f"\t{pruned[0]} with low engagement\n"
	f"\t{pruned[1]} were too short\n"
	f"\t{pruned[2]} from shallow threads\n"
)

vis.render('tree', view=True)

Pruned comments:
	46 with low engagement
	11 were too short
	54 from shallow threads



'tree.png'