In [3]:
# Demo for synchronization of two data directories
import os
import io
import pandas as pd
import numpy as np
import farmhash
import time
import sqlite3
import json

In [178]:
missions_readable = {   "clem1-l-spice-6-v1.0"       : "clementine",
                        "co-s_j_e_v-spice-6-v1.0"    : "cassini_orbiter",
                        "dawn-m_a-spice-6-v1.0"      : "dawn",
                        "di-c-spice-6-v1.0"          : "deep_impact",
                        "dif-c_e_x-spice-6-v1.0"     : "epoxi",
                        "ds1-a_c-spice-6-v1.0"       : "deep_space_1",
                        "grail-l-spice-6-v1.0"       : "grail",
                        "hay-a-spice-6-v1.0"         : "hayabusa",
                        "jno-j_e_ss-spice-6-v1.0"    : "juno",
                        "lro-l-spice-6-v1.0"         : "lunar_reconnaissance_orbiter",
                        "mer1-m-spice-6-v1.0"        : "mer_1",
                        "mer2-m-spice-6-v1.0"        : "mer_2",
                        "mess-e_v_h-spice-6-v1.0"    : "messenger",
                        "mex-e_m-spice-6-v1.0"       : "mars_express",
                        "mgs-m-spice-6-v1.0"         : "mars_global_surveyor",
                        "mro-m-spice-6-v1.0"         : "mars_reconnaissance_orbiter",
                        "msl-m-spice-6-v1.0"         : "mars_science_laboratory",
                        "near-a-spice-6-v1.0"        : "near",
                        "nh-j_p_ss-spice-6-v1.0"     : "new_horizons",
                        "ody-m-spice-6-v1.0"         : "mars_odyssey",
                        "ros-e_m_a_c-spice-6-v1.0"   : "rosetta",
                        "sdu-c-spice-6-v1.0"         : "stardust",
                        "vco-v-spice-6-v1.0"         : "venus_climate_orbiter",
                        "vex-e_v-spice-6-v1.0"       : "venus_express",
                        "vo1_vo2-m-spice-6-v1.0"     : "viking_orbiter"}

missions_true = {value: key for key, value in missions_readable.items()}

In [181]:
def create_dirdf(directory):
    if not os.path.exists(directory):
        print("Error: Directory '" + directory + "' does not exist.")
        return
    
    filenames = []
    hashvalues = []
    
    for root, subdir, files in os.walk(directory):
        for name in files:
            if not name[0] == ".": # ignore hidden files
                filepath = os.path.join(root, name)
                
                # hash full file contents
                # note: spice data encoding is mixed, so read as binary
                file = str(io.open(filepath,'rb').read()) 
                filenames.append(filepath.split(directory, 1)[1])
                hashvalues.append(farmhash.hash64(file))
                
    df = pd.DataFrame(data=hashvalues, index = filenames, columns = ["Hash"])
    df.index.name = directory
    return df

start = time.time()
dir1df = create_dirdf("./spice_data")
print("elapsed time: ", time.time() - start)
print(dir1df)

elapsed time:  0.012140035629272461
                                                                    Hash
./spice_data                                                            
/clem1-l-spice-6-v1.0/ck/testfile1.txt              13167233149662072294
/clem1-l-spice-6-v1.0/ck/testfile2.txt               2116770068367243914
/clem1-l-spice-6-v1.0/ck/testfile3.txt              10117441339441774812
/clem1-l-spice-6-v1.0/ck/testfile4.txt                407662078023551858
/clem1-l-spice-6-v1.0/ck/testdir1A/testfile1A.txt    1855841718642996950
/mess-e_v_h-spice-6-v1.0/ck/testfile1.txt           13167233149662072294
/mess-e_v_h-spice-6-v1.0/ck/testfile2.txt            2116770068367243914
/mess-e_v_h-spice-6-v1.0/ck/testfile3.txt           10117441339441774812
/mess-e_v_h-spice-6-v1.0/ck/testfile4.txt             407662078023551858
/mess-e_v_h-spice-6-v1.0/ck/testdir1A/testfile1...   1855841718642996950


In [None]:
def newest(path):
    files = glob.glob(path + '*.tm')
    metakernels = []
    sorted_mk = {}
    newest_mk = []
    
    for f in files:
        meta = f.split('mk/')
        metakernels.append(meta[1])

    for mk in metakernels:
        version = re.search('v[0-9]+', mk)
        version1 = re.search('([^;]*)_([^;]*)_', mk)
        newest_mk.append(version.group(0))
        sorted_mk[version1.group(0)] = version.group(0)
    
    return sorted_mk

print(newest('/Users/thatcher/Desktop/Classes/Capstone/SpiceData/mess-e_v_h-spice-6-v1.0/messsp_1000/extras/mk/'))

In [45]:
def create_spicedb(spdir):
    
    if os.path.exists('./spicedb.sqlite'):
        os.remove('./spicedb.sqlite')
        
    if not os.path.exists(spdir):
        print("Error: Directory '" + spdir + "' does not exist.")
        return
    
    conn = sqlite3.connect('./spicedb.sqlite') # initialize db, this might move to an init func in the api
    c = conn.cursor()
    
    c.execute("CREATE TABLE SPICE (Mission TEXT)") # create missions table
    c.execute("ALTER TABLE SPICE ADD COLUMN Kernel TEXT")
    c.execute("ALTER TABLE SPICE ADD COLUMN File TEXT")
    c.execute("ALTER TABLE SPICE ADD COLUMN Path TEXT")
    c.execute("ALTER TABLE SPICE ADD COLUMN Hash TEXT") #theres probs a way to to this in one line
    c.execute("ALTER TABLE SPICE ADD COLUMN Newest INTEGER")

    # ooh spicy tabs ~ we could probably just parse first two directories from full string?????
    for mis in [m for m in os.listdir(spdir) if not m[0] == '.']:
        for ker in [k for k in os.listdir(spdir+'/'+mis) if not k[0] == '.']:
             for root, subdir, files in os.walk(spdir+'/'+mis+'/'+ker):
                for name in files:
                    if not name[0] == ".": # ignore hidden files
                        filepath = os.getcwd()
                        mis_hr = missions_readable[mis]
                        fhash = farmhash.hash64(str(io.open(os.path.join(root, name),'rb').read())) # spice data encoding is mixed, so read as binary
                        
                        c.execute("INSERT OR IGNORE INTO SPICE (Mission, Kernel, File, Path, Hash, Newest) VALUES ('{mn}', '{kn}', '{fn}', '{fp}', '{fh}', {new})"
                                  .format(mn=mis_hr, kn=ker, fn=name, fp=filepath, fh=fhash, new=0))
    conn.commit()



{'mdis_cklist_': 'v02', 'msgr_2004_': 'v13', 'msgr_2005_': 'v13', 'msgr_2006_': 'v13', 'msgr_2007_': 'v13', 'msgr_2008_': 'v12', 'msgr_2009_': 'v10', 'msgr_2010_': 'v09', 'msgr_2011_': 'v10', 'msgr_2012_': 'v08', 'msgr_2013_': 'v06', 'msgr_2014_': 'v04', 'msgr_2015_': 'v02'}


In [64]:
conn = sqlite3.connect('./spice_data/.spicedb.sqlite')
c = conn.cursor()
c.execute("SELECT * FROM SPICE WHERE Mission='clementine' AND Kernel='ck' AND File='testfile1.txt'")
# Note: fetchall() will pull the whole buffer, if you SELECT ten times, the result will be in there ten times
all_rows = c.fetchall() 
print('=======')
if all_rows != []:
    print(all_rows[0][4])
else:
    print('nooooo')
print('=====')

nooooo
=====


In [108]:
conn.close()

In [170]:
# returns a dictionary of a single row from a sql select return
def sql_dict(sql_row):
    return { 'mission': sql_row[0],
             'kernel' : sql_row[1],
             'file'   : sql_row[2],
             'path'   : sql_row[3],
             'hash'   : sql_row[4],
             'newest' : sql_row[5] }

# returns an array of dictionaries of a whole sql select return
def sql_dict_array(sql_rows):
    dicts = []
    for row in sql_rows:
        dicts.append(sql_dict(row))
    return dicts
        

In [176]:
select_output = sql_dict_array(all_rows)
for d in select_output:
    print(json.dumps(d, indent=2))

{
  "mission": "clementine",
  "kernel": "ck",
  "file": "testfile1.txt",
  "path": "./spice_data/clem1-l-spice-6-v1.0/ck/testfile1.txt",
  "hash": "13167233149662072294",
  "newest": 0
}
{
  "mission": "clementine",
  "kernel": "ck",
  "file": "testfile2.txt",
  "path": "./spice_data/clem1-l-spice-6-v1.0/ck/testfile2.txt",
  "hash": "2116770068367243914",
  "newest": 0
}
{
  "mission": "clementine",
  "kernel": "ck",
  "file": "testfile3.txt",
  "path": "./spice_data/clem1-l-spice-6-v1.0/ck/testfile3.txt",
  "hash": "10117441339441774812",
  "newest": 0
}
{
  "mission": "clementine",
  "kernel": "ck",
  "file": "testfile4.txt",
  "path": "./spice_data/clem1-l-spice-6-v1.0/ck/testfile4.txt",
  "hash": "407662078023551858",
  "newest": 0
}
{
  "mission": "clementine",
  "kernel": "ck",
  "file": "testfile1A.txt",
  "path": "./spice_data/clem1-l-spice-6-v1.0/ck/testdir1A/testfile1A.txt",
  "hash": "1855841718642996950",
  "newest": 0
}


In [36]:

for mis in [m for m in os.listdir('./spice_data') if not m[0] == '.']:
        for ker in [k for k in os.listdir('./spice_data/'+mis+'/data') if not k[0] == '.']:
            for file in [f for f in os.listdir('./spice_data/'+mis+'/data/'+ker) if not f[0] == '.']:
                if os.path.isdir('./spice_data/'+mis+'/data/'+ker+'/'+file):
                    print(file)

spice_data/testdir1A
spice_data/testfile1.txt
spice_data/testfile2.txt
spice_data/testfile3.txt
spice_data/testfile4.txt
spice_data/testdir1A
spice_data/testfile1.txt
spice_data/testfile2.txt
spice_data/testfile3.txt
spice_data/testfile4.txt


In [39]:
print(os.getcwd())

/Users/ryan/Desktop/school/spicerack_withdb/test_concepts/sqlite_db


In [75]:
def populate_spicedb():
    
    if os.path.exists('/spicedata/.spicedb.sqlite')
        os.remove('/spicedata/.spicedb.sqlite')
        
    # atm we only use one table in the database - mostly just for storage -> quick access
    # database format will be: | Mission | Kernel | File | Path | Hash | Newest |
    conn = sqlite3.connect('/spicedata/.spicedb.sqlite')
    c = conn.cursor()
    c.execute("CREATE TABLE SPICE (Mission TEXT, Kernel TEXT, File TEXT, Path TEXT, Hash TEXT, Newest INTEGER )")
    
    for root, subdir, files in os.walk('spice_data'):
        for name in files: 
            if name[0] == '.':
                continue
            split = root.split('/')
            fhash = farmhash.hash64(str(io.open(root+'/'+name,'rb').read())) # spice data encoding is mixed, so read as binary

            if len(split) >=4 and (split[3] in ['data', 'extras']):
                c.execute("INSERT OR IGNORE INTO SPICE (Mission, Kernel, File, Path, Hash, Newest) VALUES ('{mn}', '{kn}', '{fn}', '{fp}', '{fh}', {new})"
                          .format(mn=missions_readable[split[1]], kn=split[3], fn=name, fp=root, fh=fhash, new=0))
            
    conn.commit()
    conn.close()
    print('Finished Indexing of SPICE data, stored in /spicedata/.spicedb.sqlite')

['spice_data', 'clem1-l-spice-6-v1.0', 'data', 'ck', 'testfile1.txt']
['spice_data', 'clem1-l-spice-6-v1.0', 'data', 'ck', 'testfile2.txt']
['spice_data', 'clem1-l-spice-6-v1.0', 'data', 'ck', 'testfile3.txt']
['spice_data', 'clem1-l-spice-6-v1.0', 'data', 'ck', 'testfile4.txt']
['spice_data', 'clem1-l-spice-6-v1.0', 'data', 'ck', 'testdir1A', 'testfile1A.txt']
['spice_data', 'clem1-l-spice-6-v1.0', 'extras', 'mk', 'testfile3.txt']
['spice_data', 'mess-e_v_h-spice-6-v1.0', 'data', 'ck', 'testfile1.txt']
['spice_data', 'mess-e_v_h-spice-6-v1.0', 'data', 'ck', 'testfile2.txt']
['spice_data', 'mess-e_v_h-spice-6-v1.0', 'data', 'ck', 'testfile3.txt']
['spice_data', 'mess-e_v_h-spice-6-v1.0', 'data', 'ck', 'testfile4.txt']
['spice_data', 'mess-e_v_h-spice-6-v1.0', 'data', 'ck', 'testdir1A', 'testfile1A.txt']
['spice_data', 'mess-e_v_h-spice-6-v1.0', 'extras', 'mk', 'testfile3.txt']


In [91]:
kernels = ['ck', 'ek', 'fk', 'ik', 'lsk', 'pck', 'sclk', 'spk']
for root, subdir, files in os.walk('spice_data'):
    for name in files: 
        if name[0] == '.':
            continue
        fp_split = root.split('/')
        print(root+'/'+name)

spice_data/clem1-l-spice-6-v1.0/clsp_1000/data/ck/testfile1.txt
spice_data/clem1-l-spice-6-v1.0/clsp_1000/data/ck/testfile4.txt
spice_data/clem1-l-spice-6-v1.0/clsp_1000/data/sclk/testfile3.txt
spice_data/clem1-l-spice-6-v1.0/clsp_1000/data/spk/testfile2.txt
spice_data/clem1-l-spice-6-v1.0/clsp_1000/extras/mk/testfile3.txt
spice_data/mess-e_v_h-spice-6-v1.0/messsp_1000/data/ck/testfile1.txt
spice_data/mess-e_v_h-spice-6-v1.0/messsp_1000/data/ck/testfile4.txt
spice_data/mess-e_v_h-spice-6-v1.0/messsp_1000/data/sclk/testfile3.txt
spice_data/mess-e_v_h-spice-6-v1.0/messsp_1000/data/spk/testfile2.txt
spice_data/mess-e_v_h-spice-6-v1.0/messsp_1000/extras/mk/testfile3.txt
