This script investigates "brigading," a phenomena on Reddit when members of one community (subreddit) vote and comment on posts of another. In particular, this investigates subreddits that quote the original score of a post while linking to it.

In [None]:
import praw
import re
import datetime
import numpy as np
import pandas as pd

To use this, you have to enter your Reddit username and password. After that, you have to register the script and create an application using https://www.reddit.com/prefs/apps, and enter the client id and client secret provided.

In [None]:
name = "Link Comparison (Quote)"

username = ""
password = ""
cid = ""
csec = ""

r = praw.Reddit(user_agent = name, client_id = cid, client_secret = csec, username = username, password = password)

Enter the name of the subreddit and how many posts you want to analyze.

In [None]:
subreddit_name = ""
n_posts = 

subreddit = r.subreddit(subreddit_name)

Checking the effect of brigading is simple: extract the original score of the linked comment from the text and compare it to its current score. However, additional coding is required to correct for different quoting formats and linking strategies.

In [None]:
data = []
    
def comment_data(comment):
    final_score = comment.score
    
    if comment.author:
        deleted = 0
    else:
        deleted = 1
        
    time = datetime.datetime.utcfromtimestamp(comment.created_utc)
    
    return [time, final_score, deleted]

for submission in subreddit.hot(limit = n_posts):
    url = submission.url
    time = datetime.datetime.utcfromtimestamp(submission.created)
    link_score = submission.score
    
    if submission.selftext:
        continue

    title = submission.title.replace(' ', '')
    init_score = re.findall(r'\+\d+|\d+\+|\[\d+\]', title)
    init_score = [int(re.sub('[^0-9]','', i)) for i in init_score]

    scores = len(init_score)
    if scores == 0:
        continue

    try:
        comment = r.comment(url = url)
    except praw.exceptions.ClientException:
        continue
        
    data.append([link_score, init_score[0], time] + comment_data(comment)) 
    
    if scores > 1: 
        comment.refresh()
        score = comment.score

        children = comment.replies
        if children:
            for i in range(scores - 1):
                data.append([link_score, init_score[i + 1], time] + comment_data(children[i]))
                
df = pd.DataFrame(data)
df.columns = ['link_score', 'orig_score', 'link_time', 'comment_time', 'final_score', 'deleted']

df

Now we can analyze and graph the data.

In [None]:
import matplotlib.pyplot as plt
plt.style.use('seaborn')

plt.scatter(df.orig_score, df.final_score)
plt.title("Original vs. Final Score")
plt.xlabel("Original Score")
plt.ylabel("Final Score")
plt.show()

print("Percentage Deleted: " + str(sum(df.deleted)/len(df)))

plt.hist(df.final_score - df.orig_score, bins = 30)
plt.title("Histogram of Score Changes")
plt.xlabel("Score Change")
plt.show()

plt.scatter(df.link_score, df.final_score - df.orig_score)
plt.title("Link Score vs. Score Change")
plt.xlabel("Link Score")
plt.ylabel("Score Change")
plt.show()