In [1]:
import requests
from collections import defaultdict
from datetime import datetime, timedelta
import time
import os
import json

In [2]:
subreddits = ['The_Donald','BannedFromThe_Donald']
ignored_users = ['[deleted]', 'automoderator']
lookback_days1 = 1290
# lookback_days1 = 1254
lookback_days2 = 1103
min_comments_per_sub = 1
file_name = "users.txt"

In [3]:
url = "https://api.pushshift.io/reddit/comment/search?&limit=1000&sort=desc&subreddit={}&before="

startTime = datetime.utcnow()
startEpoch = int(startTime.timestamp())
endTime1 = startTime - timedelta(days=lookback_days1)
endTime2 = startTime - timedelta(days=lookback_days2)
endEpoch = 0
endEpoch1 = int(endTime1.timestamp())
endEpoch2 = int(endTime2.timestamp())
totalSeconds = 0;
totalSeconds1 = startEpoch - endEpoch1
totalSeconds2 = startEpoch - endEpoch2

In [4]:
if not os.path.exists("overlap_subreddits"):
	os.makedirs("overlap_subreddits")

In [5]:
def loadSubredditCommenters(subreddit):
	for filename in os.listdir("overlap_subreddits"):
		if filename.endswith(".txt") and filename.startswith(subreddit):
			count_comments = 0
			with open(os.path.join("overlap_subreddits", filename), 'r') as inputFile:
				commenters = defaultdict(int)
				for line in inputFile:
					items = line.split("	")
					if len(items) != 2:
						print(f"Error loading line for {subreddit}: {line}")
						continue
					user_comments = int(items[1])
					commenters[items[0]] = user_comments
					count_comments += user_comments

			dateString = filename.split("_")[-1][:-4]
			print(f"Loaded {len(commenters)} commenters for subreddit r/{subreddit} through {dateString}")
			dateThrough = datetime.strptime(dateString, '%Y-%m-%d')
			return commenters, int(dateThrough.timestamp()), count_comments

	return None, None, 0


def saveSubredditCommenters(subreddit, commenters, dateThrough):
	if dateThrough is None:
		return
	#print(f"Saving {len(commenters)} commenters for subreddit r/{subreddit} through {dateThrough.strftime('%Y-%m-%d')}")
	for filename in os.listdir("overlap_subreddits"):
		if filename.endswith(".txt") and filename.startswith(subreddit):
			os.remove(os.path.join("overlap_subreddits", filename))

	with open(os.path.join("overlap_subreddits", f"{subreddit}_{dateThrough.strftime('%Y-%m-%d')}.txt"), 'w') as outputFile:
		for commenter, countComments in commenters.items():
			outputFile.write(commenter)
			outputFile.write("	")
			outputFile.write(str(countComments))
			outputFile.write("\n")


def countCommenters(subreddit):
	if subreddit == 'The_Donald':
		endEpoch = endEpoch1
		totalSeconds = totalSeconds1
	else:
		endEpoch = endEpoch2
		totalSeconds = totalSeconds2
	commenters, previousEpoch, count = loadSubredditCommenters(subreddit)
	print(previousEpoch)
	if commenters is None:
		commenters = defaultdict(int)
		previousEpoch = startEpoch
	print(f"Counting commenters in: {subreddit}")
	breakOut = False
	currentDate = None
	while True:
		newUrl = url.format(subreddit)+str(previousEpoch)
		try:
			response = requests.get(newUrl, headers={'User-Agent': "Overlap counter by /u/Watchful1"})
		except requests.exceptions.ReadTimeout:
			print(f"Pushshift timeout, this usually means pushshift is down. Waiting 5 seconds and trying again: {newUrl}")
			time.sleep(5)
			continue
		try:
			objects = response.json()['data']
		except json.decoder.JSONDecodeError:
			print(f"Decoding error, this usually means pushshift is down. Waiting 5 seconds and trying again: {newUrl}")
			time.sleep(5)
			continue

		time.sleep(1)  # pushshift is ratelimited. If we go too fast we'll get errors

		if len(objects) == 0:
			break
		for object in objects:
			previousEpoch = object['created_utc'] - 1
			if object['author'] not in ignored_users:
				commenters[object['author']] += 1
			count += 1
			if count % 1000 == 0:
				currentDatetime = datetime.fromtimestamp(previousEpoch)
				print("r/{0} comments: {1}, {2}, {3:.2f}%".format(
					subreddit,
					count,
					currentDatetime.strftime("%Y-%m-%d"),
					((startEpoch - previousEpoch) / totalSeconds) * 100))
				if currentDatetime.date() != currentDate:
					saveSubredditCommenters(subreddit, commenters, currentDatetime)
					currentDate = currentDatetime.date()
			if previousEpoch < endEpoch:
				breakOut = True
				currentDate = datetime.fromtimestamp(previousEpoch).date()
				break
		if breakOut:
			break
	saveSubredditCommenters(subreddit, commenters, currentDate)
	print(f"Comments: {count}, commenters: {len(commenters)}")
	return commenters

In [6]:
commenterSubreddits = defaultdict(int)
for subreddit in subreddits:
	commenters = countCommenters(subreddit)

	for commenter in commenters:
		if commenters[commenter] >= min_comments_per_sub:
			commenterSubreddits[commenter] += 1

sharedCommenters = defaultdict(list)
for commenter, countSubreddits in commenterSubreddits.items():
	if countSubreddits >= len(subreddits) - 2:
		sharedCommenters[countSubreddits].append(commenter)

commentersAll = len(sharedCommenters[len(subreddits)])
commentersMinusOne = len(sharedCommenters[len(subreddits) - 1])
commentersMinusTwo = len(sharedCommenters[len(subreddits) - 2])

print(f"{commentersAll} commenters in all subreddits, {commentersMinusOne} in all but one, {commentersMinusTwo} in all but 2")

Loaded 261562 commenters for subreddit r/The_Donald through 2018-07-11
1531292400
Counting commenters in: The_Donald
r/The_Donald comments: 15318000, 2018-07-10, 97.30%
r/The_Donald comments: 15319000, 2018-07-10, 97.30%
r/The_Donald comments: 15320000, 2018-07-10, 97.31%
r/The_Donald comments: 15321000, 2018-07-10, 97.31%
r/The_Donald comments: 15322000, 2018-07-10, 97.31%
r/The_Donald comments: 15323000, 2018-07-10, 97.31%
r/The_Donald comments: 15324000, 2018-07-10, 97.32%
r/The_Donald comments: 15325000, 2018-07-10, 97.32%
r/The_Donald comments: 15326000, 2018-07-10, 97.32%
r/The_Donald comments: 15327000, 2018-07-10, 97.32%
r/The_Donald comments: 15328000, 2018-07-10, 97.32%
r/The_Donald comments: 15329000, 2018-07-10, 97.33%
r/The_Donald comments: 15330000, 2018-07-10, 97.33%
r/The_Donald comments: 15331000, 2018-07-10, 97.33%
r/The_Donald comments: 15332000, 2018-07-10, 97.33%
r/The_Donald comments: 15333000, 2018-07-10, 97.33%
r/The_Donald comments: 15334000, 2018-07-10, 97.34%

r/The_Donald comments: 15474000, 2018-07-05, 97.71%
r/The_Donald comments: 15475000, 2018-07-05, 97.71%
r/The_Donald comments: 15476000, 2018-07-05, 97.71%
r/The_Donald comments: 15477000, 2018-07-05, 97.71%
r/The_Donald comments: 15478000, 2018-07-05, 97.71%
r/The_Donald comments: 15479000, 2018-07-05, 97.71%
r/The_Donald comments: 15480000, 2018-07-05, 97.72%
r/The_Donald comments: 15481000, 2018-07-05, 97.72%
r/The_Donald comments: 15482000, 2018-07-05, 97.72%
r/The_Donald comments: 15483000, 2018-07-05, 97.72%
r/The_Donald comments: 15484000, 2018-07-05, 97.72%
r/The_Donald comments: 15485000, 2018-07-05, 97.73%
r/The_Donald comments: 15486000, 2018-07-05, 97.73%
r/The_Donald comments: 15487000, 2018-07-05, 97.73%
r/The_Donald comments: 15488000, 2018-07-05, 97.73%
r/The_Donald comments: 15489000, 2018-07-05, 97.73%
r/The_Donald comments: 15490000, 2018-07-05, 97.74%
r/The_Donald comments: 15491000, 2018-07-05, 97.74%
r/The_Donald comments: 15492000, 2018-07-05, 97.74%
r/The_Donald

r/The_Donald comments: 15632000, 2018-06-30, 98.12%
r/The_Donald comments: 15633000, 2018-06-30, 98.12%
r/The_Donald comments: 15634000, 2018-06-30, 98.12%
r/The_Donald comments: 15635000, 2018-06-30, 98.12%
r/The_Donald comments: 15636000, 2018-06-30, 98.12%
r/The_Donald comments: 15637000, 2018-06-30, 98.13%
r/The_Donald comments: 15638000, 2018-06-30, 98.13%
r/The_Donald comments: 15639000, 2018-06-30, 98.13%
r/The_Donald comments: 15640000, 2018-06-30, 98.13%
r/The_Donald comments: 15641000, 2018-06-30, 98.14%
r/The_Donald comments: 15642000, 2018-06-30, 98.15%
r/The_Donald comments: 15643000, 2018-06-29, 98.15%
r/The_Donald comments: 15644000, 2018-06-29, 98.16%
r/The_Donald comments: 15645000, 2018-06-29, 98.16%
r/The_Donald comments: 15646000, 2018-06-29, 98.16%
r/The_Donald comments: 15647000, 2018-06-29, 98.16%
r/The_Donald comments: 15648000, 2018-06-29, 98.16%
r/The_Donald comments: 15649000, 2018-06-29, 98.17%
r/The_Donald comments: 15650000, 2018-06-29, 98.17%
r/The_Donald

r/The_Donald comments: 15790000, 2018-06-26, 98.42%
r/The_Donald comments: 15791000, 2018-06-26, 98.42%
r/The_Donald comments: 15792000, 2018-06-26, 98.42%
r/The_Donald comments: 15793000, 2018-06-26, 98.42%
r/The_Donald comments: 15794000, 2018-06-26, 98.43%
r/The_Donald comments: 15795000, 2018-06-26, 98.43%
r/The_Donald comments: 15796000, 2018-06-26, 98.43%
r/The_Donald comments: 15797000, 2018-06-26, 98.43%
r/The_Donald comments: 15798000, 2018-06-26, 98.43%
r/The_Donald comments: 15799000, 2018-06-26, 98.43%
r/The_Donald comments: 15800000, 2018-06-26, 98.43%
r/The_Donald comments: 15801000, 2018-06-26, 98.44%
r/The_Donald comments: 15802000, 2018-06-26, 98.44%
r/The_Donald comments: 15803000, 2018-06-26, 98.44%
r/The_Donald comments: 15804000, 2018-06-26, 98.44%
r/The_Donald comments: 15805000, 2018-06-26, 98.45%
r/The_Donald comments: 15806000, 2018-06-26, 98.45%
r/The_Donald comments: 15807000, 2018-06-26, 98.46%
r/The_Donald comments: 15808000, 2018-06-25, 98.46%
r/The_Donald

r/The_Donald comments: 15948000, 2018-06-21, 98.82%
r/The_Donald comments: 15949000, 2018-06-21, 98.82%
r/The_Donald comments: 15950000, 2018-06-21, 98.82%
r/The_Donald comments: 15951000, 2018-06-21, 98.83%
r/The_Donald comments: 15952000, 2018-06-21, 98.83%
r/The_Donald comments: 15953000, 2018-06-21, 98.83%
r/The_Donald comments: 15954000, 2018-06-21, 98.84%
r/The_Donald comments: 15955000, 2018-06-21, 98.84%
r/The_Donald comments: 15956000, 2018-06-20, 98.85%
r/The_Donald comments: 15957000, 2018-06-20, 98.85%
r/The_Donald comments: 15958000, 2018-06-20, 98.86%
r/The_Donald comments: 15959000, 2018-06-20, 98.86%
r/The_Donald comments: 15960000, 2018-06-20, 98.86%
r/The_Donald comments: 15961000, 2018-06-20, 98.86%
r/The_Donald comments: 15962000, 2018-06-20, 98.86%
r/The_Donald comments: 15963000, 2018-06-20, 98.87%
r/The_Donald comments: 15964000, 2018-06-20, 98.87%
r/The_Donald comments: 15965000, 2018-06-20, 98.87%
r/The_Donald comments: 15966000, 2018-06-20, 98.87%
r/The_Donald

r/The_Donald comments: 16106000, 2018-06-15, 99.26%
r/The_Donald comments: 16107000, 2018-06-15, 99.27%
r/The_Donald comments: 16108000, 2018-06-15, 99.27%
r/The_Donald comments: 16109000, 2018-06-15, 99.27%
r/The_Donald comments: 16110000, 2018-06-15, 99.27%
r/The_Donald comments: 16111000, 2018-06-15, 99.28%
r/The_Donald comments: 16112000, 2018-06-15, 99.28%
r/The_Donald comments: 16113000, 2018-06-15, 99.28%
r/The_Donald comments: 16114000, 2018-06-15, 99.28%
r/The_Donald comments: 16115000, 2018-06-15, 99.28%
r/The_Donald comments: 16116000, 2018-06-15, 99.29%
r/The_Donald comments: 16117000, 2018-06-15, 99.29%
r/The_Donald comments: 16118000, 2018-06-15, 99.29%
r/The_Donald comments: 16119000, 2018-06-15, 99.29%
r/The_Donald comments: 16120000, 2018-06-15, 99.30%
r/The_Donald comments: 16121000, 2018-06-15, 99.30%
r/The_Donald comments: 16122000, 2018-06-15, 99.31%
r/The_Donald comments: 16123000, 2018-06-14, 99.32%
r/The_Donald comments: 16124000, 2018-06-14, 99.32%
r/The_Donald

r/The_Donald comments: 16264000, 2018-06-10, 99.63%
r/The_Donald comments: 16265000, 2018-06-10, 99.63%
r/The_Donald comments: 16266000, 2018-06-10, 99.64%
r/The_Donald comments: 16267000, 2018-06-10, 99.64%
r/The_Donald comments: 16268000, 2018-06-10, 99.64%
r/The_Donald comments: 16269000, 2018-06-10, 99.64%
r/The_Donald comments: 16270000, 2018-06-10, 99.65%
r/The_Donald comments: 16271000, 2018-06-10, 99.65%
r/The_Donald comments: 16272000, 2018-06-10, 99.65%
r/The_Donald comments: 16273000, 2018-06-10, 99.65%
r/The_Donald comments: 16274000, 2018-06-10, 99.66%
r/The_Donald comments: 16275000, 2018-06-10, 99.66%
r/The_Donald comments: 16276000, 2018-06-10, 99.66%
r/The_Donald comments: 16277000, 2018-06-10, 99.66%
r/The_Donald comments: 16278000, 2018-06-10, 99.66%
r/The_Donald comments: 16279000, 2018-06-10, 99.67%
r/The_Donald comments: 16280000, 2018-06-10, 99.67%
r/The_Donald comments: 16281000, 2018-06-10, 99.67%
r/The_Donald comments: 16282000, 2018-06-10, 99.67%
r/The_Donald

In [7]:
with open(file_name, 'w') as txt:
	if commentersAll == 0:
		txt.write(f"No commenters in all subreddits\n")
	else:
		txt.write(f"{commentersAll} commenters in all subreddits\n")
		for user in sorted(sharedCommenters[len(subreddits)], key=str.lower):
			txt.write(f"{user}\n")
	txt.write("\n")

	if commentersAll < 10:
		if commentersMinusOne == 0:
			txt.write(f"No commenters in all but one subreddits\n")
		else:
			txt.write(f"{commentersMinusOne} commenters in all but one subreddits\n")
			for user in sorted(sharedCommenters[len(subreddits) - 1], key=str.lower):
				txt.write(f"{user}\n")
		txt.write("\n")

		if commentersMinusOne < 10:
			if commentersMinusTwo == 0:
				txt.write(f"No commenters in all but two subreddits\n")
			else:
				txt.write(f"{commentersMinusTwo} commenters in all but two subreddits\n")
				for user in sorted(sharedCommenters[len(subreddits) - 2], key=str.lower):
					txt.write(f"{user}\n")
			txt.write("\n")