# Import Statements

## Normal Import Statements

In [1]:
import json
from bs4 import BeautifulSoup as bs
import requests

## AWS Import Statements

In [4]:
import boto3

# Exception Classes

In [10]:
class InvalidIndex(Exception):
    pass

# WebScrapers

### Webpage Fetcher
Given a url, return the text of the webpage behind that url.

In [51]:
def get_webpage(webpage):
    return requests.get(
        webpage,
        headers={
            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
        }
    ).text

### Stats Page Scraper
Given the id of a question, fetch the stats of that given question. 

In [52]:
def get_stat_webpage_data(question_id):
    webpage = 'https://willyoupressthebutton.com/{0}/stats'.format(question_id)
    webpage_content = get_webpage(webpage)
    
    soup = bs(webpage_content, 'html.parser')
    
    main_container = soup.find(id='maincontainer')
    
    if main_container is None:
        raise InvalidIndex({
            "message":"No question found with that index", 
            "index": question_id
        })
    
    stats = [stat for stat in [a for a in main_container.find(id='statsBar').children][1].children]
    
    did_press = stats[1].getText()
    did_press_count = int(did_press.split()[0])
    
    didnt_press = stats[3].getText()
    didnt_press_count = int(didnt_press.split()[0])

    dilemma = [a for a in main_container.find(id='dilemma').children]
    pro = dilemma[1].getText().strip()
    con = dilemma[5].getText().strip()
    
    return {
        'link': webpage,
        'index': question_id,
        'pro': pro,
        'con': con,
        'did_press_count': did_press_count,
        'didnt_press_count': didnt_press_count
    }

### Random Question Scraper
Goes to the main page of willyoupressthebutton.com, and scrapes to figure out the given question, stats, and question id. 

In [53]:
def get_main_webpage_data():
    webpage = 'https://willyoupressthebutton.com'
    webpage_content = get_webpage(webpage)
    
    soup = bs(webpage_content, 'html.parser')
    
    main_container = soup.find(id='maincontainer')
    
    yes_button_container = main_container.find(id='yesbtn')
    question_id = int(yes_button_container['href'].split('/')[1])
    
    pro_container = main_container.find(id='cond')
    pro_text = pro_container.getText().strip()
    
    con_container = main_container.find(id='res')
    con_text = con_container.getText().strip()
    
    return {
        'id': question_id,
        'pro_text': pro_text,
        'con_text': con_text
    }

### Question Respone Scraper
Given a question ID and the user's response, notify the website, and get the most updated stats about the question. 

In [92]:
def get_response_stats_data(question_id, user_response):
    webpage = 'https://willyoupressthebutton.com/{0}/'.format(question_id)
    if user_response: 
        webpage += 'yes'
    else: 
        webpage += 'no'

    webpage_content = get_webpage(webpage)
    
    soup = bs(webpage_content, 'html.parser')
    
    main_container = soup.find(id='maincontainer')
    
    if main_container is None:
        raise InvalidIndex({
            "message":"No question found with that index", 
            "index": question_id
        })
    
    stats = [stat for stat in [a for a in main_container.find(id='statsBar').children][1].children]
    
    did_press = stats[1].getText()
    did_press_count = int(did_press.split()[0])
    did_press_percent = int(did_press[did_press.index('(') + 1: did_press.index(')') - 1])
    
    didnt_press = stats[3].getText()
    didnt_press_count = int(didnt_press.split()[0])
    didnt_press_percent = 100 - did_press_percent
    
    return {
        'id': question_id,
        'pro_count': did_press_count,
        'con_count': didnt_press_count,
        'pro_percent': did_press_percent,
        'con_percent': didnt_press_percent
    }

In [96]:
data = get_main_webpage_data()

In [97]:
data

{'con_text': 'You will melt when you touch water',
 'id': 2183,
 'pro_text': 'Be able to harness the powers of fire and become immune to heat'}

In [95]:
get_response_stats_data(data['id'], True)

{'con_count': 35479,
 'con_percent': 59,
 'id': 47248,
 'pro_count': 24881,
 'pro_percent': 41}