In [None]:
# Demo for synchronization of two data directories
import os
import io
import pandas as pd
import numpy as np
import farmhash
import time
import sqlite3
import json
import glob
import re

In [None]:
missions_readable = {   "clem1-l-spice-6-v1.0"       : "clementine",
                        "co-s_j_e_v-spice-6-v1.0"    : "cassini_orbiter",
                        "dawn-m_a-spice-6-v1.0"      : "dawn",
                        "di-c-spice-6-v1.0"          : "deep_impact",
                        "dif-c_e_x-spice-6-v1.0"     : "epoxi",
                        "ds1-a_c-spice-6-v1.0"       : "deep_space_1",
                        "grail-l-spice-6-v1.0"       : "grail",
                        "hay-a-spice-6-v1.0"         : "hayabusa",
                        "jno-j_e_ss-spice-6-v1.0"    : "juno",
                        "lro-l-spice-6-v1.0"         : "lunar_reconnaissance_orbiter",
                        "mer1-m-spice-6-v1.0"        : "mer_1",
                        "mer2-m-spice-6-v1.0"        : "mer_2",
                        "mess-e_v_h-spice-6-v1.0"    : "messenger",
                        "mex-e_m-spice-6-v1.0"       : "mars_express",
                        "mgs-m-spice-6-v1.0"         : "mars_global_surveyor",
                        "mro-m-spice-6-v1.0"         : "mars_reconnaissance_orbiter",
                        "msl-m-spice-6-v1.0"         : "mars_science_laboratory",
                        "near-a-spice-6-v1.0"        : "near",
                        "nh-j_p_ss-spice-6-v1.0"     : "new_horizons",
                        "ody-m-spice-6-v1.0"         : "mars_odyssey",
                        "ros-e_m_a_c-spice-6-v1.0"   : "rosetta",
                        "sdu-c-spice-6-v1.0"         : "stardust",
                        "vco-v-spice-6-v1.0"         : "venus_climate_orbiter",
                        "vex-e_v-spice-6-v1.0"       : "venus_express",
                        "vo1_vo2-m-spice-6-v1.0"     : "viking_orbiter"}

missions_true = {value: key for key, value in missions_readable.items()}

In [None]:
def create_dirdf(directory):
    if not os.path.exists(directory):
        print("Error: Directory '" + directory + "' does not exist.")
        return
    
    filenames = []
    hashvalues = []
    
    for root, subdir, files in os.walk(directory):
        for name in files:
            if not name[0] == ".": # ignore hidden files
                filepath = os.path.join(root, name)
                
                # hash full file contents
                # note: spice data encoding is mixed, so read as binary
                file = str(io.open(filepath,'rb').read()) 
                filenames.append(filepath.split(directory, 1)[1])
                hashvalues.append(farmhash.hash64(file))
                
    df = pd.DataFrame(data=hashvalues, index = filenames, columns = ["Hash"])
    df.index.name = directory
    return df

start = time.time()
dir1df = create_dirdf("/Users/thatcher/Desktop/Classes/Capstone/SpiceData/")
print("elapsed time: ", time.time() - start)
print(dir1df)

In [None]:
def newest_mk(path):
    files = glob.glob(path + '*.tm')
    metakernels = []
    sorted_mk = {}
    newest_mk = []
    
    for f in files:
        meta = f.split('mk/')
        metakernels.append(meta[1])

    for mk in metakernels:
        version = re.search('v[0-9]+', mk)
        version1 = re.search('([^;]*)_([^;]*)_', mk)
        newest_mk.append(version.group(0))
        sorted_mk[version1.group(0)] = version.group(0)
    
    return sorted_mk

def newest_kernel(path):
    all_files = glob.glob(path + '/**/' + '*.*', recursive=True)
    files = []
    
    for file in all_files:
        
        if file.endswith('.lbl') or file.endswith('.txt'):
            continue
        else:
            files.append(file)
            
    metakernels = []
    newest_mk = []
    sorted_mk = {}
    
    for f in files:
        kern = f.split('data')
        metakernels.append(kern[1])
            
    for mk in metakernels:
#         version = re.search('v[0-9]+\.', mk)
        version1 = re.search('v[0-9]+', mk)
        version2 = re.search('_v[0-9]+', mk)

        split = mk.split('.')

        if version2:
            split = mk.split('_v')

        elif version1:
            split = mk.split('v')
            
        sorted_mk[split[0]] = mk
            
#         if version1 == None:
#             sorted_mk[mk] = 'Newest'
#         else:
#             sorted_mk[version1.group(0).split('v')[0]] = mk
#             print(mk)


    return sorted_mk

files = newest_kernel('/Users/thatcher/Desktop/Classes/Capstone/SpiceData/mess-e_v_h-spice-6-v1.0/messsp_1000/data')

for file in files:
    print('File: ' + file + ' Val:' + files[file])

In [None]:
def create_spicedb(spdir):
    
    if os.path.exists('./spicedb.sqlite'):
        os.remove('./spicedb.sqlite')
        
    if not os.path.exists(spdir):
        print("Error: Directory '" + spdir + "' does not exist.")
        return
    
    kern_dict = newest_kernel('/Users/thatcher/Desktop/Classes/Capstone/SpiceData/clem1-l-spice-6-v1.0/clsp_1000/data/')
    conn = sqlite3.connect('./spicedb.sqlite') # initialize db, this might move to an init func in the api
    c = conn.cursor()
    
    c.execute("CREATE TABLE SPICE (Mission TEXT)") # create missions table
    c.execute("ALTER TABLE SPICE ADD COLUMN Kernel TEXT")
    c.execute("ALTER TABLE SPICE ADD COLUMN File TEXT")
    c.execute("ALTER TABLE SPICE ADD COLUMN Path TEXT")
    c.execute("ALTER TABLE SPICE ADD COLUMN Hash TEXT") #theres probs a way to to this in one line
    c.execute("ALTER TABLE SPICE ADD COLUMN Newest TEXT")

    # ooh spicy tabs ~ we could probably just parse first two directories from full string?????
    for mis in [m for m in os.listdir(spdir) if not m[0] == '.']:
        for ker in [k for k in os.listdir(spdir+'/'+mis) if not k[0] == '.']:
             for root, subdir, files in os.walk(spdir+'/'+mis+'/'+ker):
                for name in files:
                    if not name[0] == ".": # ignore hidden files
                        print(name)
                        filepath = os.getcwd()
                        
                        if name in kern_dict:
                            newest = kern_dict[name]
                            
                        else:
                            newest = 'Newest'
                            
                        mis_hr = missions_readable[mis]
                        fhash = farmhash.hash64(str(io.open(os.path.join(root, name),'rb').read())) # spice data encoding is mixed, so read as binary
                        c.execute("INSERT OR IGNORE INTO SPICE (Mission, Kernel, File, Path, Hash, Newest) VALUES ('{mn}', '{kn}', '{fn}', '{fp}', '{fh}', '{new}')"
                                  .format(mn=mis_hr, kn=root.split('/')[-1], fn=name, fp=filepath, fh=fhash, new=newest))
    conn.commit()



In [None]:
create_spicedb('/Users/thatcher/Desktop/Classes/Capstone/SpiceData/')
conn = sqlite3.connect('./spicedb.sqlite')
c = conn.cursor()
c.execute("SELECT * FROM SPICE WHERE Kernel != 'Newest'")
# Note: fetchall() will pull the whole buffer, if you SELECT ten times, the result will be in there ten times
all_rows = c.fetchall()

In [None]:
for row in all_rows:
    print(row)

In [None]:
conn.close()

In [None]:
# returns a dictionary of a single row from a sql select return
def sql_dict(sql_row):
    return { 'mission': sql_row[0],
             'kernel' : sql_row[1],
             'file'   : sql_row[2],
             'path'   : sql_row[3],
             'hash'   : sql_row[4],
             'newest' : sql_row[5] }

# returns an array of dictionaries of a whole sql select return
def sql_dict_array(sql_rows):
    dicts = []
    for row in sql_rows:
        dicts.append(sql_dict(row))
    return dicts
        

In [None]:
select_output = sql_dict_array(all_rows)
for d in select_output:
    print(json.dumps(d, indent=2))