In [101]:
import praw
from psaw import PushshiftAPI
import datetime as dt
import pandas as pd

api = PushshiftAPI()

In [102]:

start_epoch = int(dt.datetime(2020, 12, 30).timestamp())
end_epoch = int(dt.datetime(2021, 1, 3).timestamp())
subreddit = 'trans'
filterList = [
    'id',
    'title',
    'selftext',
    'subreddit',
    'num_comments',
    'score'
]

results = list(api.search_submissions(after=start_epoch,
                            before=end_epoch,
                            subreddit=subreddit,
                            filter=filterList,
                            sort='asc'))

In [104]:

results_df = pd.DataFrame(results)
columnNames = results_df.columns

results_df['created_utc'] = pd.to_datetime(results_df['created_utc'], origin='unix', unit='s')

result_by_month_df = results_df.groupby(pd.Grouper(key='created_utc',freq='M'))



In [105]:
# save final output to file

def printBuckets(bucketList, fileNamePostfix):
    bucket_num = 1
    for created_utc, bucket in bucketList:
        fileName = "data/" + subreddit + "/" + fileNamePostfix + str(bucket_num) + ".csv"
        bucket.to_csv(fileName, index=False)
        bucket_num += 1

printBuckets(result_by_month_df, "month")

In [117]:
# now we do some sentiment analysis on each bucket
import text2emotion as te
import operator

def combine_dicts(dict1, dict2):
    return {key: dict1.get(key, 0) + dict2.get(key, 0)
          for key in set(dict1) | set(dict2)}


bucket_sums = []
for reated_utc, bucket in result_by_month_df:
    bucket_sum = {'Angry': 0.0, 'Fear': 0.0, 'Happy': 0.0, 'Sad': 0.0, 'Surprise': 0.0}
    N = len(bucket)

    for index, entry in bucket.iterrows():
        emotion_tuple = te.get_emotion(str(entry.title) + " " + str(entry.selftext))
        if (sum(emotion_tuple.values()) > 0):
            # scale each tuple by the post score
            emotion_tuple.update({n: entry.score * emotion_tuple[n] for n in emotion_tuple.keys()})
            bucket_sum = combine_dicts(bucket_sum, emotion_tuple )

    bucket_sums.append(bucket_sum)

print(bucket_sums)

[{'Surprise': 82.47000000000001, 'Happy': 71.39999999999999, 'Fear': 65.80999999999997, 'Sad': 57.6, 'Angry': 10.560000000000002}, {'Surprise': 132.86000000000004, 'Happy': 133.73000000000008, 'Fear': 118.29, 'Sad': 96.47999999999999, 'Angry': 40.26999999999999}]


In [118]:
# convert dictionary list to csv
emotion_df = pd.DataFrame(bucket_sums)
normalized_emotion_df = emotion_df.div(emotion_df.sum(axis=1), axis=0)
normalized_emotion_df.to_csv("emotionAvgsOut.csv")