# Web 3: More Flask and A/B testing

In [1]:
import requests
import time
import urllib.robotparser

import pandas as pd
# new import statement: requires pip3 install scipy
from scipy import stats

- `flask.request.args`: enables us to get the arguments passed as part of the URL
    - How do we pass arguments?
        - at the end of the URL, add a "?"
        - then separate argument-value pair by "="
        - use "&" as delimiter between two argument-value pairs
    - examples: 
        - http://35.226.223.87:5000/add?x=10&y=20
        - http://35.226.223.87:5000/survey?major=CS
        - http://35.226.223.87:5000/survey?major=Mechanical_Engineering

In [2]:
base_url = "http://35.226.223.87:5000/"

### `urllib.robotparser`

- Documentation: https://docs.python.org/3/library/urllib.robotparser.html
- A few websites with robots.txt
    - https://en.wikipedia.org/robots.txt
    - https://www.reddit.com/robots.txt
    - https://cs320.cs.wisc.edu/su24/robots.txt

In [3]:
rp = urllib.robotparser.RobotFileParser()
rp.set_url(base_url + "/robots.txt")
rp.read()
rp.can_fetch("cs320bot", base_url + "/slow")

True

In [4]:
rp.can_fetch("cs320bot", base_url + "never")

False

In [5]:
def friendly_get(url):
    if not rp.can_fetch("cs320bot", url):
        raise Exception("you're not supposed to visit that page")
    while True:
        resp = requests.get(url)
        if resp.status_code == 429:
            seconds = int(resp.headers.get("Retry-After", 1))
            print(f"sleep {seconds}")
            time.sleep(seconds)
            continue
        resp.raise_for_status() # raise exception if not 200
        return resp
    
friendly_get(base_url + "slow").text

'welcome!'

# A/B testing

In [6]:
df = pd.DataFrame({
    "click":    {"A": 50, "B": 55},
    "no-click": {"A": 50, "B": 45}
})
df
# Which has the higher CTR A or B?

Unnamed: 0,click,no-click
A,50,50
B,55,45


In [7]:
_, pvalue = stats.fisher_exact(df)
pvalue 
# no evidence that A and B are difference because pvalue is not less than 5%

np.float64(0.5712421394829712)

### Two situations when pvalue will be lower than significance threshold

1. Sample size is the same, but skew is very heavy --- unlikely to have that by chance
2. Sample size is large, but skew is small 

In [8]:
# Scenario 1: 
# Sample size is the same, but skew is very heavy --- 
# unlikely to have that by chance

df = pd.DataFrame({
    "click":    {"A": 50, "B": 75},
    "no-click": {"A": 50, "B": 25}
})
_, pvalue = stats.fisher_exact(df)
pvalue

np.float64(0.00042033045869994034)

In [9]:
# Scenario 2: 
# Sample size is large, but skew is small 

df = pd.DataFrame({
    "click":    {"A": 500, "B": 550},
    "no-click": {"A": 500, "B": 450}
})
_, pvalue = stats.fisher_exact(df)
pvalue

np.float64(0.02820356890423392)