In [None]:
import sys # for gioia to load aiohttp
sys.path.append('/Users/maggiori/anaconda/envs/py35/lib/python3.5/site-packages')

In [None]:
# to import modules locally without having installed the entire package
# http://stackoverflow.com/questions/714063/importing-modules-from-parent-folder
import os, sys, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir) 

In [None]:
import subprocess
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_style('white')
sns.set_context('notebook')

# Time Series Database

This notebook demonstrates the persistent behavior of the database.

## Initialization

* Clear the file system for demonstration purposes.

In [None]:
# database parameters
ts_length = 100
data_dir = '../db_files'
db_name = 'default'
dir_path = data_dir + '/' + db_name + '/'

In [None]:
# clear file system for testing
if not os.path.exists(dir_path):
    os.makedirs(dir_path)
filelist = [dir_path + f for f in os.listdir(dir_path)]
for f in filelist:
    os.remove(f)

* Load the database server.

In [None]:
# when running from the terminal
# python go_server_persistent.py --ts_length 100 --db_name 'demo'

# here we load the server as a subprocess for demonstration purposes
server = subprocess.Popen(['python', '../go_server_persistent.py',
                           '--ts_length', str(ts_length), '--data_dir', data_dir, '--db_name', db_name])

* Load the database webserver.

In [None]:
# when running from the terminal
# python go_webserver.py

# here we load the server as a subprocess for demonstration purposes
webserver = subprocess.Popen(['python', '../go_webserver.py'])

* Import the web interface and initialize it.

In [None]:
from webserver import *

In [None]:
web_interface = WebInterface()

## Generate Data

Let's create some dummy data to aid in our demonstration. You will need to import the `timeseries` package to work with the TimeSeries format.

**Note:** the database is persistent, so can store data between sessions, but we will start with an empty database here for demonstration purposes.

In [None]:
from timeseries import *

In [None]:
def tsmaker(m, s, j):
    '''
    Helper function: randomly generates a time series for testing.

    Parameters
    ----------
    m : float
        Mean value for generating time series data
    s : float
        Standard deviation value for generating time series data
    j : float
        Quantifies the "jitter" to add to the time series data

    Returns
    -------
    A time series and associated meta data.
    '''

    # generate metadata
    meta = {}
    meta['order'] = int(np.random.choice(
        [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5]))
    meta['blarg'] = int(np.random.choice([1, 2]))
    meta['vp'] = False  # initialize vantage point indicator as negative

    # generate time series data
    t = np.arange(0.0, 1.0, 0.01)
    v = norm.pdf(t, m, s) + j * np.random.randn(ts_length)

    # return time series and metadata
    return meta, TimeSeries(t, v)

In [None]:
# generate sample time series
num_ts = 50
mus = np.random.uniform(low=0.0, high=1.0, size=num_ts)
sigs = np.random.uniform(low=0.05, high=0.4, size=num_ts)
jits = np.random.uniform(low=0.05, high=0.2, size=num_ts)

# initialize dictionaries for time series and their metadata
primary_keys = []
tsdict = {}
metadict = {}

# fill dictionaries with randomly generated entries for database
for i, m, s, j in zip(range(num_ts), mus, sigs, jits):
    meta, tsrs = tsmaker(m, s, j)  # generate data
    pk = "ts-{}".format(i)  # generate primary key
    primary_keys.append(pk) # keep track of all primary keys
    tsdict[pk] = tsrs  # store time series data
    metadict[pk] = meta  # store metadata
    
# to assist with later testing
ts_keys = sorted(tsdict.keys())
    
# randomly choose time series as vantage points
num_vps = 5
random_vps = np.random.choice(range(num_ts), size=num_vps, replace=False)
vpkeys = ['ts-{}'.format(i) for i in random_vps]

## Insert Data

Let's start by loading the data into the database, using the REST API web interface.

In [None]:
# add stats trigger
web_interface.add_trigger('stats', 'insert_ts', ['mean', 'std'], None)

In [None]:
# insert the time series
for k in tsdict:
    web_interface.insert_ts(k, tsdict[k])

In [None]:
# upsert the metadata
for k in tsdict:
    web_interface.upsert_meta(k, metadict[k])

In [None]:
# add the vantage points
for i in range(num_vps):
    web_interface.insert_vp(vpkeys[i])

## Inspect Data

Let's inspect the data, to make sure that all the previous operations were successful.

In [None]:
# select all database entries; all metadata fields
results = web_interface.select(fields=[])

In [None]:
# we have the right number of database entries
assert len(results) == num_ts

# we have all the right primary keys
assert sorted(results.keys()) == ts_keys

In [None]:
# check that all the time series and metadata matches
for k in tsdict:
    results = web_interface.select(fields=['ts'], md={'pk': k})
    assert results[k]['ts'] == tsdict[k]

## Terminate and Reload Database

In [None]:
server.terminate()
webserver.terminate()
web_interface = None