In [32]:
import sys # for gioia to load aiohttp
sys.path.append('/Users/maggiori/anaconda/envs/py35/lib/python3.5/site-packages')

In [33]:
# to import modules locally without having installed the entire package
# http://stackoverflow.com/questions/714063/importing-modules-from-parent-folder
import os, sys, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir) 

In [34]:
import subprocess
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_style('white')
sns.set_context('notebook')

# Time Series Database

This notebook demonstrates the persistent behavior of the database.

## Initialization

* Clear the file system for demonstration purposes.

In [35]:
# database parameters
ts_length = 100
data_dir = '../db_files'
db_name = 'default'
dir_path = data_dir + '/' + db_name + '/'

In [36]:
# clear file system for testing
if not os.path.exists(dir_path):
    os.makedirs(dir_path)
filelist = [dir_path + f for f in os.listdir(dir_path)]
for f in filelist:
    os.remove(f)

* Load the database server.

In [37]:
# when running from the terminal
# python go_server_persistent.py --ts_length 100 --db_name 'demo'

# here we load the server as a subprocess for demonstration purposes
server = subprocess.Popen(['python', '../go_server_persistent.py',
                           '--ts_length', str(ts_length), '--data_dir', data_dir, '--db_name', db_name])

* Load the database webserver.

In [38]:
# when running from the terminal
# python go_webserver.py

# here we load the server as a subprocess for demonstration purposes
webserver = subprocess.Popen(['python', '../go_webserver.py'])

* Import the web interface and initialize it.

In [39]:
from webserver import *

In [40]:
web_interface = WebInterface()

## Generate Data

Let's create some dummy data to aid in our demonstration. You will need to import the `timeseries` package to work with the TimeSeries format.

**Note:** the database is persistent, so can store data between sessions, but we will start with an empty database here for demonstration purposes.

In [41]:
from timeseries import *

In [42]:
def tsmaker(m, s, j):
    '''
    Helper function: randomly generates a time series for testing.

    Parameters
    ----------
    m : float
        Mean value for generating time series data
    s : float
        Standard deviation value for generating time series data
    j : float
        Quantifies the "jitter" to add to the time series data

    Returns
    -------
    A time series and associated meta data.
    '''

    # generate metadata
    meta = {}
    meta['order'] = int(np.random.choice(
        [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5]))
    meta['blarg'] = int(np.random.choice([1, 2]))

    # generate time series data
    t = np.arange(0.0, 1.0, 0.01)
    v = norm.pdf(t, m, s) + j * np.random.randn(ts_length)

    # return time series and metadata
    return meta, TimeSeries(t, v)

In [43]:
# generate sample time series
num_ts = 50
mus = np.random.uniform(low=0.0, high=1.0, size=num_ts)
sigs = np.random.uniform(low=0.05, high=0.4, size=num_ts)
jits = np.random.uniform(low=0.05, high=0.2, size=num_ts)

# initialize dictionaries for time series and their metadata
primary_keys = []
tsdict = {}
metadict = {}

# fill dictionaries with randomly generated entries for database
for i, m, s, j in zip(range(num_ts), mus, sigs, jits):
    meta, tsrs = tsmaker(m, s, j)  # generate data
    pk = "ts-{}".format(i)  # generate primary key
    primary_keys.append(pk) # keep track of all primary keys
    tsdict[pk] = tsrs  # store time series data
    metadict[pk] = meta  # store metadata
    
# to assist with later testing
ts_keys = sorted(tsdict.keys())
    
# randomly choose time series as vantage points
num_vps = 5
random_vps = np.random.choice(range(num_ts), size=num_vps, replace=False)
vpkeys = sorted(['ts-{}'.format(i) for i in random_vps])

## Insert Data

Let's start by loading the data into the database, using the REST API web interface.

In [44]:
# check that the database is empty
web_interface.select()

OrderedDict()

In [45]:
# add stats trigger
web_interface.add_trigger('stats', 'insert_ts', ['mean', 'std'], None)

'OK'

In [46]:
# insert the time series
for k in tsdict:
    web_interface.insert_ts(k, tsdict[k])

In [47]:
# upsert the metadata
for k in tsdict:
    web_interface.upsert_meta(k, metadict[k])

In [48]:
# add the vantage points
for i in range(num_vps):
    web_interface.insert_vp(vpkeys[i])

## Inspect Data

Let's inspect the data, to make sure that all the previous operations were successful.

In [49]:
# select all database entries; all metadata fields
results = web_interface.select(fields=[])

In [50]:
# we have the right number of database entries
assert len(results) == num_ts

# we have all the right primary keys
assert sorted(results.keys()) == ts_keys

In [51]:
# check that all the time series and metadata matches
for k in tsdict:
    results = web_interface.select(fields=['ts'], md={'pk': k})
    assert results[k]['ts'] == tsdict[k]
    results = web_interface.select(fields=[], md={'pk': k})
    for field in metadict[k]:
        assert metadict[k][field] == results[k][field]

In [59]:
# check that the vantage points match
print('Vantage points selected:', vpkeys)
print('Vantage points in database:',
      web_interface.select(fields=None, md={'vp': True}, additional={'sort_by': '+pk'}).keys())

Vantage points selected: ['ts-15', 'ts-20', 'ts-31', 'ts-46', 'ts-9']
Vantage points in database: odict_keys(['ts-15', 'ts-20', 'ts-31', 'ts-46', 'ts-9'])


In [62]:
# check that the trigger has executed as expected (allowing for rounding errors)
for k in tsdict:
    results = web_interface.select(fields=['mean', 'std'], md={'pk': k})
    assert np.round(results[k]['mean'], 4) == np.round(tsdict[k].mean(), 4)
    assert np.round(results[k]['std'], 4) == np.round(tsdict[k].std(), 4)

Let's generate an additional time series for similarity searches. We'll store the time series and the results of the similarity searches, so that we can compare against them after reloading the database.

In [64]:
_, query = tsmaker(np.random.uniform(low=0.0, high=1.0),
                   np.random.uniform(low=0.05, high=0.4),
                   np.random.uniform(low=0.05, high=0.2))

In [66]:
results_vp = web_interface.vp_similarity_search(query, 1)
results_vp

OrderedDict([('ts-4', 0.14793853804867974)])

In [68]:
results_isax = web_interface.isax_similarity_search(query)
results_isax

OrderedDict([('ts-4', 1.8006607394375933)])

Finally, let's store our iSAX tree representation.

In [71]:
results_tree = web_interface.isax_tree()
print(results_tree)

root
--->['11', '10', '00', '00']: 5 ['ts-18', 'ts-19', 'ts-26', 'ts-31', 'ts-39']
--->['11', '10', '00', '01']: 1 ['ts-34']
--->['11', '01', '00', '11']: 1 ['ts-46']
--->['00', '01', '11', '11']: 2 ['ts-14', 'ts-45']
--->['10', '10', '10', '00']: 4 ['ts-0', 'ts-16', 'ts-3', 'ts-9']
--->['01', '00', '10', '11']: 0 []
------>['011', '000', '100', '111']: 1 ['ts-2']
------>['010', '001', '101', '111']: 1 ['ts-5']
------>['010', '001', '100', '111']: 1 ['ts-41']
------>['010', '001', '101', '110']: 1 ['ts-17']
------>['011', '001', '100', '111']: 1 ['ts-29']
------>['011', '000', '101', '110']: 1 ['ts-33']
--->['00', '00', '10', '11']: 4 ['ts-1', 'ts-22', 'ts-42', 'ts-48']
--->['10', '00', '01', '11']: 2 ['ts-27', 'ts-35']
--->['00', '01', '10', '11']: 1 ['ts-11']
--->['11', '10', '01', '00']: 5 ['ts-20', 'ts-23', 'ts-36', 'ts-37', 'ts-44']
--->['11', '00', '00', '11']: 1 ['ts-8']
--->['00', '00', '11', '11']: 1 ['ts-30']
--->['11', '11', '01', '00']: 5 ['ts-28', 'ts-32', 'ts-38', 'ts-4',

## Terminate and Reload Database

Now that we know that everything is loaded, let's close the database and re-open it.

In [72]:
server.terminate()
webserver.terminate()
web_interface = None

In [73]:
server = subprocess.Popen(['python', '../go_server_persistent.py',
                           '--ts_length', str(ts_length), '--data_dir', data_dir, '--db_name', db_name])
webserver = subprocess.Popen(['python', '../go_webserver.py'])
web_interface = WebInterface()

## Inspect Data

Let's repeat the previous tests to check whether our persistence architecture worked.

In [74]:
# select all database entries; all metadata fields
results = web_interface.select(fields=[])

In [78]:
web_interface.select(fields=[])

OrderedDict([('ts-33',
              OrderedDict([('blarg', 1),
                           ('pk', 'ts-33'),
                           ('useless', 1062695157),
                           ('vp', False),
                           ('std', 0.4770243763923645),
                           ('order', 1061211351),
                           ('mean', 0.7254546284675598)]))])

In [75]:
# we have the right number of database entries
assert len(results) == num_ts

# we have all the right primary keys
assert sorted(results.keys()) == ts_keys

In [77]:
# check that all the time series and metadata matches
for k in tsdict:
    results = web_interface.select(fields=['ts'], md={'pk': k})
    assert results[k]['ts'] == tsdict[k]
    results = web_interface.select(fields=[], md={'pk': k})
    for field in metadict[k]:
        if metadict[k][field] != results[k][field]:
            print(k, field, metadict[k][field], results[k][field])
#         assert metadict[k][field] == results[k][field]

ts-31 order -1 0
ts-34 order -4 1058371128
ts-46 order 0 1063892437
ts-14 order -3 1062630045
ts-0 order -5 1065379942
ts-3 order 1 1065718121
ts-2 order -3 1060174732
ts-42 order 3 1062983357
ts-35 order 3 1058798047
ts-9 order 0 1065310348
ts-11 order -5 1063268559
ts-48 order 1 1061994563
ts-36 order -2 1051432939
ts-18 order -3 1049491364
ts-8 order -1 1057427250
ts-23 order 2 1050019161
ts-30 order -3 1062320276
ts-38 order -1 1062721490
ts-45 order -3 1063763770
ts-20 order -3 1050489331
ts-43 order -5 1061680176
ts-27 order -4 1058630216
ts-22 order -2 1062161939
ts-5 order -5 1060950230
ts-6 order -4 1065247692
ts-24 order 5 1064063519
ts-32 order -3 1056015493
ts-39 order 0 1053291596
ts-49 order 1 1064029434
ts-15 order 3 1059647462
ts-12 order -5 1063911407
ts-1 order -2 1063791921
ts-37 order 0 1049154434
ts-21 order 3 1065060483
ts-40 order -4 1065278102
ts-26 order -1 1047792833
ts-13 order -3 1066187057
ts-7 order -4 1066374293
ts-16 order 2 1065702951
ts-44 order -5 105