# Project for Reflections In Data Science
## Reddit post tracker

Select new posts without any form of engagement, divide them into two seperate groups.
The two groups are a control and one group which will be given a like/upvote. 
These posts are then tracked to see their development. 

This process is repeated every day for 7 days.


## Technical description
A group of subreddits are chosen. The script is set to traverse each subreddits "new posts" page a select posts without

In [None]:
import random
import datetime
import praw
import json

class ReflectionExperimentReddit:
    def __init__(self, savedPostsFilename, clientId, clientSecret, userAgent, username, password):
        self.savedPostsFilename = savedPostsFilename
        self.pages = []

        self.reddit = praw.Reddit(
            client_id=clientId,
            client_secret=clientSecret,
            user_agent=userAgent,
            username=username, 
            password=password
        )

    def addPage(self, page):

        # Is the page name a string?
        if not isinstance(page, str):
            raise TypeError("page name is not string")

        self.pages.append(page)

    def addPages(self, pages):
        _pages = self.pages

        try:
            for page in pages:
                self.addPage(page)
        except TypeError:
            # Revert to previous state before raising the error
            self.pages = _pages
            raise TypeError("page name is not string, reverting to past state")
            

    def isEligiblePost(self, post):

        # In the post, find the upvote button. 
        # The next div in the html contains the number of votes.
        # If there are no votes there is a placeholder "Vote".
        noVotes = post.select_one("button[aria-label='upvote']").find_next("div").text == "Vote"

        # There is a number of comments displayed in the text field of an a object in the html
        noComments = post.select_one("a[data-click-id='comments']").text.split(" ")[0] == "0"

        # If both conditions are met, return True, else False
        if noVotes and noComments:
            return True
        else:
            return False
    
    def likePost(self, post):
        pass

    def createPostData(self, post, category):
        postData = {
            "url":post.select_one("a[data-click-id='body']")["href"],
            "nVotes": 0,
            "nComments": 0 ,
            "category": category,
            "datetime": datetime.datetime.now()
        }
        return postData


    def savePost(self, postData):
        with open(self.savedPostsFilename, "a", encoding="utf-8") as file:
            file.write(json.dumps(postData)+ "\n")

    def checkPost(self, oldPostData):
        r = requests.get(oldPostData["url"])

        if r.status_code != 200:
                print("Error: ", r.status_code)

        s = soup(r.text, "html.parser")
        
        nComments = 0
        nVotes = 0

        postData = {
            "url":oldPostData["url"],
            "nVotes": nVotes,
            "nComments": nComments,
            "category": oldPostData["category"],
            "datetime": datetime.datetime.now()
        }
        return postData

    def checkAllSavedPosts(self):
        with open(self.savedPostsFilename, "w", encoding="utf-8") as file:
            for line in file.readlines():
                self.checkPost(json.loads(line))

    def findNewPosts(self):
        for page in self.pages:
            r = requests.get(self.baseUrl + page + "/new")

            if r.status_code != 200:
                print("Error: ", r.status_code)

            s = soup(r.text, "html.parser")

            # Find all of the post containers
            posts = s.select("div[data-testid='post-container']")

            for post in posts:
                if self.isEligiblePost(post):

                    isExperimental = bool(random.randint(0,1))
                    if isExperimental:
                        self.likePost(post)
                        
                    self.savePost(self.createPostData(post, isExperimental))
                    print("Eligible post, url: " + post.select_one("a[data-click-id='body']")["href"])

In [36]:
import requests
from bs4 import BeautifulSoup as soup

baseUrl = "https://www.reddit.com/r/"

pages = ["EscapeFromTarkov"]

for page in pages:

    # Navigate to the new posts part of the page
    r =  requests.get(baseUrl + page + "/new")

    if r.status_code != 200:
        print("Error: ", r.status_code)

    s = soup(r.text, "html.parser")

    # Find all of the post containers
    posts = s.select("div[data-testid='post-container']")

    for post in posts:

        # In the post, find the upvote button. 
        # The next div in the html contains the number of votes.
        # If there are no votes there is a placeholder "Vote".
        noVotes = post.select_one("button[aria-label='upvote']").find_next("div").text == "Vote"

        # There is a number of comments displayed in the text field of an a object in the html
        noComments = post.select_one("a[data-click-id='comments']").text.split(" ")[0] == "0"

        # If both conditions are met, add the post to a group
        if noVotes and noComments:
            print("Eligible post, url: " + post.select_one("a[data-click-id='body']")["href"])