## Imports

In [1]:
# for loading the data
import cookielib
import urllib
import urllib2
import getpass
import sys
import json
# standard imports
from collections import defaultdict
import math
# third party packages
import numpy as np

## Loading The Data

In [2]:
HOST = "https://working-dog-data-dash.appspot.com"

def create_opener():
    """creates a urllib2.OpenerDirector with a CookieJar"""
    cj = cookielib.CookieJar()
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    return opener

def login(opener):
    """prompts the cli user for login credentials to P.A.W.S.
     then sends a login request to the real server using opener.

    Arguments:
        opener - a urllib2.OpenerDirector, this should have a cookier jar.
    """
    login_url = HOST+"/login"
    print("Please enter credentials for P.A.W.S.")
    user = getpass.getpass("Userame: ")
    password = getpass.getpass("Password: ")
    form_data = {"username": user, "password": password}
    params = urllib.urlencode(form_data)
    response = opener.open(login_url, data=params)
    code = response.getcode()
    if code != 200:
        raise Exception("Failed to login!")

# login to backend service
opener = create_opener()
login(opener)

# fetch data from server
url = HOST+"/api/cached/data/filtered/blob"
response = opener.open(url)

# read and check result
raw = response.read()
if raw[0] != '{':
    raise Exception("Failed to load!")

# parse the JSON
# NOTE: we have no floats
blob = json.loads(raw, parse_int=int, parse_float=int)

# Debug loaded data
num_dogs = len(blob["dogs"])
print ""
print "Loaded data with num_dogs = %d" % (num_dogs)

Please enter credentials for P.A.W.S.
Userame: ········
Password: ········

Loaded data with num_dogs = 124


## Cleaning The Data

In [3]:
# define small common utilities
def print_bar():
    print "-"*70

In [4]:
# make dict of data by dog id
dogs = {}
for dog in blob["dogs"]:
    id = dog["id"]
    dogs[id] = dog
    dogs[id]["days"] = []

# copy day info to dogs
for day in blob["days"]:
    date = day["date"]
    for day_data in day["dogs"]:
        id = day_data["id"]
        dogs[id]["days"].append(day_data)
        dogs[id]["days"][-1]["date"] = date

print "Number of days (filtered to 70% of a day) least to most:"
print_bar()
print sorted([len(dogs[id]["days"]) for id in dogs])

Number of days (filtered to 70% of a day) least to most:
----------------------------------------------------------------------
[27, 39, 39, 44, 45, 48, 50, 57, 62, 69, 71, 77, 83, 84, 85, 86, 87, 91, 100, 127, 140, 143, 145, 161, 175, 206, 221, 227, 230, 234, 252, 256, 260, 265, 283, 300, 301, 302, 319, 328, 337, 338, 342, 346, 361, 362, 364, 366, 366, 373, 374, 377, 380, 384, 399, 401, 404, 415, 422, 447, 448, 451, 456, 465, 470, 478, 479, 480, 484, 484, 485, 488, 493, 495, 498, 500, 502, 503, 503, 504, 509, 510, 513, 517, 520, 528, 529, 533, 533, 534, 534, 536, 539, 539, 541, 549, 558, 561, 564, 567, 576, 596, 600, 603, 606, 608, 617, 619, 619, 619, 629, 634, 636, 643, 644, 647, 649, 649, 652, 671, 673, 674, 678, 681]


In [5]:
DAY_MINS = 1440

# get and sort all totals for analysis
totals = sorted([dog["total"] for dog in blob["dogs"]])
print "Minute Totals:"
print_bar()
total_mean = np.mean(totals)
total_std = np.std(totals)
total_ten_pct = totals[int(len(totals)*.1)]
# print helper
def print_value_days_weeks(name, value):
    print "%10s: %10f (mins), %10f (days), %10f (weeks)" %\
        (name, value, value/DAY_MINS, value/DAY_MINS/7)
print_value_days_weeks("Mean", total_mean)
print_value_days_weeks("Std-Dev", total_std)
print_value_days_weeks("Bottom 10%", total_ten_pct)
print_value_days_weeks("Mean - Std", total_mean - total_std)
print_bar()

Minute Totals:
----------------------------------------------------------------------
      Mean: 560912.879032 (mins), 389.522833 (days),  55.646119 (weeks)
   Std-Dev: 274515.776789 (mins), 190.635956 (days),  27.233708 (weeks)
Bottom 10%: 116940.000000 (mins),  81.000000 (days),  11.000000 (weeks)
Mean - Std: 286397.102243 (mins), 198.886877 (days),  28.412411 (weeks)
----------------------------------------------------------------------


In [6]:
# filter to have at least 1/2 year of valid days
total_threshold = 26 * 7
filtered_dogs = {k: v for k,v in dogs.items()\
                 if len(v["days"]) >= total_threshold}
num_below_threshold = len(dogs) - len(filtered_dogs)
print "Number of dogs below threshold: %d" % (num_below_threshold)
print "      Number of remaining dogs: %d" % (len(filtered_dogs))

Number of dogs below threshold: 25
      Number of remaining dogs: 99


## Analyze Data

In [7]:
# first put the dog data into buckets by outcome
dogs_by_outcome = defaultdict(list)
for dog in filtered_dogs.itervalues():
    status = dog["dog_status"]
    if status == "":
        status = "Unknown Status"
    dogs_by_outcome[status].append(dog)

# create alphabetically sorted set of possible outcomes
outcomes = sorted(dogs_by_outcome.keys())

print "Outcomes:"
print_bar()

# convert to percentages instead of totals (except for the total itself)
results_by_outcome = {}
for outcome in outcomes:
    dogs = dogs_by_outcome[outcome]
    results = {
        "outcome": outcome,
        "dogs": [],
    }
    for dog in dogs:
        result = dog
        total = dog["total"]
        result["active_total"] = dog["active"]
        result["active"] = float(dog["active_total"]) / total * 100
        result["rest_total"] = dog["rest"]
        result["rest"] = float(dog["rest"]) / total * 100
        result["awake_total"] = dog["awake"]
        result["awake"] = float(dog["awake"]) / total * 100
        results["dogs"].append(result)
    results["active_mean"] = np.mean([v["active"] for v in results["dogs"]])
    results["awake_mean"] = np.mean([v["awake"] for v in results["dogs"]])
    results["rest_mean"] = np.mean([v["rest"] for v in results["dogs"]])
    for k,v in results.iteritems():
        if k != "dogs":
            print (k, v)
    print_bar()
    results_by_outcome[outcome] = results


Outcomes:
----------------------------------------------------------------------
('outcome', u'Active Breeder')
('rest_mean', 61.446258926289055)
('awake_mean', 32.026896861056166)
('active_mean', 6.5268442126547788)
----------------------------------------------------------------------
('outcome', u'Active Grad Dog')
('rest_mean', 61.324272158986375)
('awake_mean', 31.895557852134861)
('active_mean', 6.7801699888787654)
----------------------------------------------------------------------
('outcome', u'Advanced Training')
('rest_mean', 61.941540724328355)
('awake_mean', 31.230064236800292)
('active_mean', 6.8283950388713546)
----------------------------------------------------------------------
('outcome', u'Released Dog')
('rest_mean', 61.142695163058391)
('awake_mean', 32.229646414410198)
('active_mean', 6.6276584225314039)
----------------------------------------------------------------------
('outcome', 'Unknown Status')
('rest_mean', 59.977061007156522)
('awake_mean', 31.7975939