# Project for Reflections In Data Science
## Reddit post tracker

Select new posts without any form of engagement, divide them into two seperate groups.
The two groups are a control and one group which will be given a like/upvote. 
These posts are then tracked to see their development. 

This process is repeated every day for 7 days.


## Technical description
A group of subreddits are chosen. The script is set to traverse each subreddits "new posts" page a select posts without

In [None]:
import random
import datetime
import praw
import json
import pandas as pd

class ReflectionExperimentReddit:
    def __init__(self, savedPostsFilename, clientId, clientSecret, userAgent, username, password):
        # Expected fileformat is a newline seperated json
        self.savedPostsFilename = savedPostsFilename

        self.pages = []

        self.reddit = praw.Reddit(
            client_id=clientId,
            client_secret=clientSecret,
            user_agent=userAgent,
            username=username, 
            password=password
        )

    def addPage(self, page):

        # Is the page name a string?
        if not isinstance(page, str):
            raise TypeError("page name is not string")

        self.pages.append(page)

    def addPages(self, pages):
        _pages = self.pages

        try:
            for page in pages:
                self.addPage(page)
        except TypeError:
            # Revert to previous state before raising the error
            self.pages = _pages
            raise TypeError("page name is not string, reverting to past state")
            

    def isEligiblePost(self, post):

        # Check the attributes of a post
        noVotes = post.num_votes == 0
        noComments = post.num_comments == 0

        # If both conditions are met, return True, else False
        if noVotes and noComments:
            return True
        else:
            return False
    
    def likePost(self, post):
        post.upvote()

    def createPostData(self, id, isExperimental):
        postData = {
            "id":id,
            "nVotes": 0,
            "nComments": 0 ,
            "isExperimental": isExperimental,
            "datetime": datetime.datetime.now()
        }
        return postData

    def savePost(self, postData):
        # Saves posts to the file

        with open(self.savedPostsFilename, "a", encoding="utf-8") as file:
            file.write(json.dumps(postData)+ "\n")

    def findNewPosts(self):
        for page in self.pages:
            
            for post in self.reddit.subreddit(page).new():
                if self.isEligiblePost(post):

                    isExperimental = bool(random.randint(0,1))
                    if isExperimental:
                        self.likePost(post)
                        
                    self.savePost(self.createPostData(post, isExperimental))


    def loadSavedPosts(self):
        pass

    def checkPost(self, oldPostData):

        post = self.reddit.submission(id=oldPostData["id"])

        nComments = post.num_comments
        nVotes = post.num_votes

        postData = {
            "id": oldPostData["id"],
            "nVotes": nVotes,
            "nComments": nComments,
            "isExperimental": oldPostData["isExperimental"],
            "datetime": datetime.datetime.now()
        }

        return postData


    def checkSavedPosts(self):
        df = self.loadSavedPosts()
        _df = df.groupby("id", "isExperimental")["datetime"].max().reset_index()

        for idx, post in _df.iterrows():
            self.savePost(self.checkPost({"id":post[0], "isExperimental":post[1]}))
