# Demonstrations of Sifter Precalc/SQL Functionality

 - This notebook contains a mixture of demos of finished code, as well as WIP experimentation with ways to speed-up the code 

In [1]:
import time
import numpy as np
import scipy.stats as stats
import math
import random
from collections import defaultdict
import os
import sys
from collections import Counter
import glob 
import warnings
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import importlib
from astropy.time import Time

# -------------------------------------------------------------------------------------
# Local imports
# -------------------------------------------------------------------------------------
sys.path.append( os.path.join(os.path.dirname(os.getcwd() ), 'tests')  )
sys.path.append( os.path.join(os.path.dirname(os.path.dirname(os.getcwd()) ), 'orbit_cheby', 'orbit_cheby')  )
from sifter import precalc, sql
import orbit_cheby
import test_tracklets 

importlib.reload(orbit_cheby)
importlib.reload(precalc)
importlib.reload(test_tracklets)
importlib.reload(sql)


<module 'sifter.sql' from '/home/malexand/.anaconda3/envs/astroconda36/lib/python3.6/site-packages/sifter/sql.py'>

# Explicit Test Code Exists in sifter/tests

 - test_sqlite.py
 - test_tracklets.py
 
 Run these using pytest

# Top Level: "Tracklets" - Storage of obs80-strings for multiple tracklets
 - This takes input sets of obs80-strings (assumed to have already been partitioned into subsets, one subset per tracklet), performs some useful precalculations, and then saves them into an sql database
 - This calls a number of under-lying functionalities

In [2]:
####--- Here I set up an empty database and create some fake observations ---###

# Create db from scratch (uses convenience funciton from test-suite)
test_tracklets.convenience_func_create_db_and_tables()

# Read in ITF (or some small part of it)
ITF_file = '../dev_data/test.itf'
with open(ITF_file) as f:
  observation_list = f.readlines()

In [3]:
%%time 
# instantiate with observation_pair
T = precalc.Tracklets(observation_list)



CPU times: user 1.32 s, sys: 4 ms, total: 1.33 s
Wall time: 1.35 s


In [4]:
%%time
# test that the above caused the tracklet to be uploaded to db
cur = T.conn.cursor()
cur.execute('SELECT * from tracklets')
f = cur.fetchone()
assert( len(f)>3 ), 'data not uploaded'
if len(f) > 3 : print('Data uploaded as required')

# Completely delete db to facilitate future testing
os.remove(sql.fetch_db_filepath())


Data uploaded as required
CPU times: user 17 ms, sys: 2 ms, total: 19 ms
Wall time: 24.1 ms


# Lower-Level Functionalities
 - The above "Tracklets" instantiation uses some underlying functionality to perform the parsing & storage of the obs80 strings
 - Here we demonstrate some of the underlying functionality

##### sql.create_connection
 - Create a connection to an extant database 

In [5]:
%%time 
# Where do we want the db to live
assert 'sifter' in sql.fetch_db_filepath()

if os.path.isfile( sql.fetch_db_filepath() ):
    os.remove(sql.fetch_db_filepath())

# Does a db get created & connected-to ?
conn = sql.create_connection( sql.fetch_db_filepath() )
assert os.path.isfile( os.path.join( sql.fetch_db_filepath() ) )


CPU times: user 46 ms, sys: 0 ns, total: 46 ms
Wall time: 47.6 ms


##### precalc.Tracklets.parse_all_observations
 - parse a single long list of all observations, splits them into tracklets, performs calculations of (e.g.) HP, RoM, etc, and then creates an summary dictionary for each, and sticking all the dictionaries in a list.

In [6]:
%%time 
# define observations
# Read in ITF (or some small part of it)
ITF_file = '../dev_data/test.itf'
with open(ITF_file) as f:
  observation_list = f.readlines()    

# call parse_all_observations
tracklet_dictionary_list = T.parse_all_observations(observation_list)


# check that the returned results are as expected
assert isinstance(tracklet_dictionary_list, list)
for tracklet_dictionary in tracklet_dictionary_list:
    assert 'JD' in tracklet_dictionary
    assert 'HP' in tracklet_dictionary
    assert 'tracklet_name' in tracklet_dictionary


CPU times: user 757 ms, sys: 8 ms, total: 765 ms
Wall time: 754 ms


##### precalc.Tracklets.parse_tracklet_observations
 - parse the list of observations for a single tracklet, perform calculations of (e.g.) HP, RoM, etc, and then creates an summary dictionary 

In [7]:
%%time 
# define observations
tracklet_observations= [ '     K11Q88F*~C2011 08 29.52378 01 57 34.729+14 35 44.64         22.8 rc~0qBd568',
                         '     K11Q88F ~C2011 08 29.55470 01 57 34.343+14 35 42.61         22.9 rc~0qBd568',
                         '     K11Q88F ~C2011 08 29.61470 01 57 34.343+14 35 42.59         22.9 rc~0qBd568']
    
# call parse_observations
tracklet_dictionary = T.parse_tracklet_observations(tracklet_observations)


# check that the returned results are as expected
assert isinstance(tracklet_dictionary, dict)
assert 'JD' in tracklet_dictionary
assert 'HP' in tracklet_dictionary
assert 'tracklet_name' in tracklet_dictionary
assert 'observations' in tracklet_dictionary
assert tracklet_observations == tracklet_dictionary['observations']


CPU times: user 13 ms, sys: 0 ns, total: 13 ms
Wall time: 13.9 ms


##### precalc.identify_tracklets
 - This function takes a long list of observations and identifies tracklets, sticking them together into a list containing a list of observation for each tracklet.

In [8]:
%%time 
# Define some observations
# Here we have three dummy triplets, with the last two intentionally using the same trksub
# in order to test that identify_tracklets correctly identifies them as separate tracklets. 
observation_list= ['     K11Q88F*~C2011 08 29.52378 01 57 34.729+14 35 44.64         22.8 rc~0qBd568',
                   '     K11Q88F ~C2011 08 29.55470 01 57 34.343+14 35 42.61         22.9 rc~0qBd568',
                   '     K11Q88F ~C2011 08 29.61470 01 57 34.343+14 35 42.59         22.9 rc~0qBd568',
                   '     K11Q99F*~C2012 08 29.52378 01 57 34.729+14 35 44.64         22.8 rc~0qBd568',
                   '     K11Q99F ~C2012 08 29.58470 01 57 34.343+14 35 42.61         22.9 rc~0qBd568',
                   '     K11Q99F ~C2012 08 29.61470 01 57 34.343+14 35 42.59         22.9 rc~0qBd568',
                   '     K11Q99F ~C2018 09 28.52378 01 57 34.729+14 35 44.64         22.8 rc~0qBd568',
                   '     K11Q99F ~C2018 09 28.58470 01 57 34.343+14 35 42.61         22.9 rc~0qBd568',
                   '     K11Q99F ~C2018 09 28.61470 01 57 34.343+14 35 42.59         22.9 rc~0qBd568',
                   ]
# Split into tracklet lists using identify_tracklets:
list_of_lists = precalc.identify_tracklets(observation_list)
# Check that there are three tracklets as expected:
assert len(list_of_lists) == 3
assert len(list_of_lists[0]) == 3
assert len(list_of_lists[1]) == 3
assert len(list_of_lists[2]) == 3

CPU times: user 0 ns, sys: 1 ms, total: 1 ms
Wall time: 245 µs


##### precalc.save_tracklets & sql.upsert_tracklets
 - Saves a dictionary into a SQLITE db, using JD & HP as important columns for selection/query later-on

In [9]:
%%time 
# Create db from scratch
test_tracklets.convenience_func_create_db_and_tables()


# Set up a Tracklet and use the parse_observations routine to get JD, HP, ...
T = precalc.Tracklets()
observation_pairs= [[ '     K11Q88F*~C2011 08 29.52378 01 57 34.729+14 35 44.64         22.8 rc~0qBd568',
                     '     K11Q88F ~C2011 08 29.55470 01 57 34.343+14 35 42.61         22.9 rc~0qBd568',
                     '     K11Q88F ~C2011 08 29.61470 01 57 34.343+14 35 42.59         22.9 rc~0qBd568'],
                    [ '     K11Q99F*~C2012 08 29.52378 01 57 34.729+14 35 44.64         22.8 rc~0qBd568',
                     '     K11Q99F ~C2012 08 29.58470 01 57 34.343+14 35 42.61         22.9 rc~0qBd568',
                     '     K11Q99F ~C2012 08 29.61470 01 57 34.343+14 35 42.59         22.9 rc~0qBd568']]
    
# call parse_tracklet_observations
tracklet_dictionary_list = [T.parse_tracklet_observations(tracklet_observations) for tracklet_observations in observation_pairs]

# Now save the data in the db
T.save_tracklets(tracklet_dictionary_list)

# Test the data was uploaded and can be downloaded
cur = T.conn.cursor()
cur.execute('SELECT * from tracklets')
f = cur.fetchall()
assert( len(f)==2 and np.all([ len(_)>3 for _ in f]) ), 'data not uploaded'

# Completely delete db to facilitate future testing
os.remove(sql.fetch_db_filepath())



CPU times: user 91 ms, sys: 4 ms, total: 95 ms
Wall time: 135 ms
