In [40]:
# Demo for synchronization of two data directories
import os
import io
import pandas as pd
import numpy as np
import farmhash
import time
import sqlite3
import glob
import re

In [42]:
def create_dirdf(directory):
    if not os.path.exists(directory):
        print("Error: Directory '" + directory + "' does not exist.")
        return
    
    filenames = []
    hashvalues = []
    
    for root, subdir, files in os.walk(directory):
        for name in files:
            if not name[0] == ".": # ignore hidden files
                filepath = os.path.join(root, name)
                
                # hash full file contents
                # note: spice data encoding is mixed, so read as binary
                file = str(io.open(filepath,'rb').read()) 
                filenames.append(filepath.split(directory, 1)[1])
                hashvalues.append(farmhash.hash64(file))
                
    df = pd.DataFrame(data=hashvalues, index = filenames, columns = ["Hash"])
    df.index.name = directory
    return df

start = time.time()
dir1df = create_dirdf("./testdir1")
print("elapsed time: ", time.time() - start)

dir2df = create_dirdf("./testdir2")

print(dir1df, dir2df)

missions_readable = {   "clem1-l-spice-6-v1.0"       : "clementine",
                        "co-s_j_e_v-spice-6-v1.0"    : "cassini_orbiter",
                        "dawn-m_a-spice-6-v1.0"      : "dawn",
                        "di-c-spice-6-v1.0"          : "deep_impact",
                        "dif-c_e_x-spice-6-v1.0"     : "epoxi",
                        "ds1-a_c-spice-6-v1.0"       : "deep_space_1",
                        "grail-l-spice-6-v1.0"       : "grail",
                        "hay-a-spice-6-v1.0"         : "hayabusa",
                        "jno-j_e_ss-spice-6-v1.0"    : "juno",
                        "lro-l-spice-6-v1.0"         : "lunar_reconnaissance_orbiter",
                        "mer1-m-spice-6-v1.0"        : "mer_1",
                        "mer2-m-spice-6-v1.0"        : "mer_2",
                        "mess-e_v_h-spice-6-v1.0"    : "messenger",
                        "mex-e_m-spice-6-v1.0"       : "mars_express",
                        "mgs-m-spice-6-v1.0"         : "mars_global_surveyor",
                        "mro-m-spice-6-v1.0"         : "mars_reconnaissance_orbiter",
                        "msl-m-spice-6-v1.0"         : "mars_science_laboratory",
                        "near-a-spice-6-v1.0"        : "near",
                        "nh-j_p_ss-spice-6-v1.0"     : "new_horizons",
                        "ody-m-spice-6-v1.0"         : "mars_odyssey",
                        "ros-e_m_a_c-spice-6-v1.0"   : "rosetta",
                        "sdu-c-spice-6-v1.0"         : "stardust",
                        "vco-v-spice-6-v1.0"         : "venus_climate_orbiter",
                        "vex-e_v-spice-6-v1.0"       : "venus_express",
                        "vo1_vo2-m-spice-6-v1.0"     : "viking_orbiter"}

missions_true = {value: key for key, value in missions_readable.items()}

Error: Directory './testdir1' does not exist.
elapsed time:  0.00015497207641601562
Error: Directory './testdir2' does not exist.
None None


In [45]:
def newest(path):
    files = glob.glob(path + '*.tm')
    metakernels = []
    sorted_mk = {}
    newest_mk = []
    
    for f in files:
        meta = f.split('mk/')
        metakernels.append(meta[1])

    for mk in metakernels:
        version = re.search('v[0-9]+', mk)
        version1 = re.search('([^;]*)_([^;]*)_', mk)
        newest_mk.append(version.group(0))
        sorted_mk[version1.group(0)] = version.group(0)
    
    return sorted_mk

print(newest('/Users/thatcher/Desktop/Classes/Capstone/SpiceData/mess-e_v_h-spice-6-v1.0/messsp_1000/extras/mk/'))

def create_spicedb(spdir):
    
    if os.path.exists('./spicedb.sqlite'):
        os.remove('./spicedb.sqlite')
        
    if not os.path.exists(spdir):
        print("Error: Directory '" + spdir + "' does not exist.")
        return
    
    conn = sqlite3.connect('./spicedb.sqlite') # initialize db, this might move to an init func in the api
    c = conn.cursor()
    
    c.execute("CREATE TABLE SPICE (Mission TEXT)") # create missions table
    c.execute("ALTER TABLE SPICE ADD COLUMN Kernel TEXT")
    c.execute("ALTER TABLE SPICE ADD COLUMN File TEXT")
    c.execute("ALTER TABLE SPICE ADD COLUMN Path TEXT")
    c.execute("ALTER TABLE SPICE ADD COLUMN Hash TEXT") #theres probs a way to to this in one line
    c.execute("ALTER TABLE SPICE ADD COLUMN Newest INTEGER")

    # ooh spicy tabs ~ we could probably just parse first two directories from full string?????
    for mis in [m for m in os.listdir(spdir) if not m[0] == '.']:
        for ker in [k for k in os.listdir(spdir+'/'+mis) if not k[0] == '.']:
             for root, subdir, files in os.walk(spdir+'/'+mis+'/'+ker):
                for name in files:
                    if not name[0] == ".": # ignore hidden files
                        filepath = os.getcwd()
                        mis_hr = missions_readable[mis]
                        fhash = farmhash.hash64(str(io.open(os.path.join(root, name),'rb').read())) # spice data encoding is mixed, so read as binary
                        
                        c.execute("INSERT OR IGNORE INTO SPICE (Mission, Kernel, File, Path, Hash, Newest) VALUES ('{mn}', '{kn}', '{fn}', '{fp}', '{fh}', {new})"
                                  .format(mn=mis_hr, kn=ker, fn=name, fp=filepath, fh=fhash, new=0))
    conn.commit()



{'mdis_cklist_': 'v02', 'msgr_2004_': 'v13', 'msgr_2005_': 'v13', 'msgr_2006_': 'v13', 'msgr_2007_': 'v13', 'msgr_2008_': 'v12', 'msgr_2009_': 'v10', 'msgr_2010_': 'v09', 'msgr_2011_': 'v10', 'msgr_2012_': 'v08', 'msgr_2013_': 'v06', 'msgr_2014_': 'v04', 'msgr_2015_': 'v02'}


In [31]:
conn = sqlite3.connect('./spicedb.sqlite')
c = conn.cursor()
c.execute("SELECT Path FROM SPICE")
# Note: fetchall() will pull the whole buffer, if you SELECT ten times, the result will be in there ten times
all_rows = c.fetchall() 
for row in all_rows:
    print(row)

('/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db',)
('/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db',)
('/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db',)
('/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db',)
('/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db',)
('/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db',)
('/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db',)
('/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db',)
('/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db',)
('/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db',)
('/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db',)
('/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db',)
('/Users/thatcher/Desktop/Classes/Capsto

In [108]:
conn.close()