In [10]:
# Demo for synchronization of two data directories
import os
import io
import pandas as pd
import numpy as np
import farmhash
import time
import sqlite3
import json
import glob
import re

In [3]:
missions_readable = {   "clem1-l-spice-6-v1.0"       : "clementine",
                        "co-s_j_e_v-spice-6-v1.0"    : "cassini_orbiter",
                        "dawn-m_a-spice-6-v1.0"      : "dawn",
                        "di-c-spice-6-v1.0"          : "deep_impact",
                        "dif-c_e_x-spice-6-v1.0"     : "epoxi",
                        "ds1-a_c-spice-6-v1.0"       : "deep_space_1",
                        "grail-l-spice-6-v1.0"       : "grail",
                        "hay-a-spice-6-v1.0"         : "hayabusa",
                        "jno-j_e_ss-spice-6-v1.0"    : "juno",
                        "lro-l-spice-6-v1.0"         : "lunar_reconnaissance_orbiter",
                        "mer1-m-spice-6-v1.0"        : "mer_1",
                        "mer2-m-spice-6-v1.0"        : "mer_2",
                        "mess-e_v_h-spice-6-v1.0"    : "messenger",
                        "mex-e_m-spice-6-v1.0"       : "mars_express",
                        "mgs-m-spice-6-v1.0"         : "mars_global_surveyor",
                        "mro-m-spice-6-v1.0"         : "mars_reconnaissance_orbiter",
                        "msl-m-spice-6-v1.0"         : "mars_science_laboratory",
                        "near-a-spice-6-v1.0"        : "near",
                        "nh-j_p_ss-spice-6-v1.0"     : "new_horizons",
                        "ody-m-spice-6-v1.0"         : "mars_odyssey",
                        "ros-e_m_a_c-spice-6-v1.0"   : "rosetta",
                        "sdu-c-spice-6-v1.0"         : "stardust",
                        "vco-v-spice-6-v1.0"         : "venus_climate_orbiter",
                        "vex-e_v-spice-6-v1.0"       : "venus_express",
                        "vo1_vo2-m-spice-6-v1.0"     : "viking_orbiter"}

missions_true = {value: key for key, value in missions_readable.items()}

In [6]:
def create_dirdf(directory):
    if not os.path.exists(directory):
        print("Error: Directory '" + directory + "' does not exist.")
        return
    
    filenames = []
    hashvalues = []
    
    for root, subdir, files in os.walk(directory):
        for name in files:
            if not name[0] == ".": # ignore hidden files
                filepath = os.path.join(root, name)
                
                # hash full file contents
                # note: spice data encoding is mixed, so read as binary
                file = str(io.open(filepath,'rb').read()) 
                filenames.append(filepath.split(directory, 1)[1])
                hashvalues.append(farmhash.hash64(file))
                
    df = pd.DataFrame(data=hashvalues, index = filenames, columns = ["Hash"])
    df.index.name = directory
    return df

start = time.time()
dir1df = create_dirdf("/Users/thatcher/Desktop/Classes/Capstone/SpiceData/")
print("elapsed time: ", time.time() - start)
print(dir1df)

elapsed time:  45.778687953948975
                                                                    Hash
/Users/thatcher/Desktop/Classes/Capstone/SpiceD...                      
clem1-l-spice-6-v1.0/clsp_1000/aareadme.htm          9860181418246867054
clem1-l-spice-6-v1.0/clsp_1000/aareadme.lbl          2900448953212383570
clem1-l-spice-6-v1.0/clsp_1000/aareadme.txt         17585396647624954768
clem1-l-spice-6-v1.0/clsp_1000/errata.txt           14362732480219937772
clem1-l-spice-6-v1.0/clsp_1000/voldesc.cat          12318166595063303068
clem1-l-spice-6-v1.0/clsp_1000/catalog/catinfo.txt   1154749873628357165
clem1-l-spice-6-v1.0/clsp_1000/catalog/insthost...  13768184483255343979
clem1-l-spice-6-v1.0/clsp_1000/catalog/mission.cat  17362930047179486599
clem1-l-spice-6-v1.0/clsp_1000/catalog/person.cat    7349549391493292032
clem1-l-spice-6-v1.0/clsp_1000/catalog/ref.cat        308121063549536287
clem1-l-spice-6-v1.0/clsp_1000/catalog/release.cat  16489566392610648761
clem1-l-spice-6-v

In [334]:
def newest_mk(path):
    files = glob.glob(path + '*.tm')
    metakernels = []
    sorted_mk = {}
    newest_mk = []
    
    for f in files:
        meta = f.split('mk/')
        metakernels.append(meta[1])

    for mk in metakernels:
        version = re.search('v[0-9]+', mk)
        version1 = re.search('([^;]*)_([^;]*)_', mk)
        newest_mk.append(version.group(0))
        sorted_mk[version1.group(0)] = version.group(0)
    
    return sorted_mk

def newest_kernel(path):
    all_files = glob.glob(path + '/**/' + '*.*', recursive=True)
    files = []
    
    for file in all_files:
        
        if file.endswith('.lbl') or file.endswith('.txt'):
            continue
        else:
            files.append(file)
            
    metakernels = []
    newest_mk = []
    sorted_mk = {}
    
    for f in files:
        kern = f.split('data')
        metakernels.append(kern[1])
            
    for mk in metakernels:
#         version = re.search('v[0-9]+\.', mk)
        version1 = re.search('v[0-9]+', mk)
        version2 = re.search('_v[0-9]+', mk)

        split = mk.split('.')

        if version2:
            split = mk.split('_v')

        elif version1:
            split = mk.split('v')
            
        sorted_mk[split[0]] = mk
            
#         if version1 == None:
#             sorted_mk[mk] = 'Newest'
#         else:
#             sorted_mk[version1.group(0).split('v')[0]] = mk
#             print(mk)


    return sorted_mk

files = newest_kernel('/Users/thatcher/Desktop/Classes/Capstone/SpiceData/mess-e_v_h-spice-6-v1.0/messsp_1000/data')

for file in files:
    print('File: ' + file + ' Val:' + files[file])

File: /ck/msgr_1405 Val:/ck/msgr_1405_v02.bc
File: /ck/msgr_1406 Val:/ck/msgr_1406_v02.bc
File: /ck/msgr_1407 Val:/ck/msgr_1407_v02.bc
File: /ck/msgr_1408 Val:/ck/msgr_1408_v02.bc
File: /ck/msgr_1409 Val:/ck/msgr_1409_v02.bc
File: /ck/msgr_1410 Val:/ck/msgr_1410_v01.bc
File: /ck/msgr_1411 Val:/ck/msgr_1411_v01.bc
File: /ck/msgr_1412 Val:/ck/msgr_1412_v01.bc
File: /ck/msgr_1501 Val:/ck/msgr_1501_v01.bc
File: /ck/msgr_1502 Val:/ck/msgr_1502_v01.bc
File: /ck/msgr_1503 Val:/ck/msgr_1503_v01.bc
File: /ck/msgr_1504 Val:/ck/msgr_1504_v01.bc
File: /ck/msgr_mdis_gm040819_080128 Val:/ck/msgr_mdis_gm040819_080128v1.bc
File: /ck/msgr_mdis_gm040819_090201 Val:/ck/msgr_mdis_gm040819_090201v1.bc
File: /ck/msgr_mdis_gm040819_091021 Val:/ck/msgr_mdis_gm040819_091021v1.bc
File: /ck/msgr_mdis_gm040819_110603 Val:/ck/msgr_mdis_gm040819_110603v1.bc
File: /ck/msgr_mdis_gm040819_110921 Val:/ck/msgr_mdis_gm040819_110921v1.bc
File: /ck/msgr_mdis_gm040819_120520 Val:/ck/msgr_mdis_gm040819_120520v1.bc
File: /ck/

In [236]:
def create_spicedb(spdir):
    
    if os.path.exists('./spicedb.sqlite'):
        os.remove('./spicedb.sqlite')
        
    if not os.path.exists(spdir):
        print("Error: Directory '" + spdir + "' does not exist.")
        return
    
    kern_dict = newest_kernel('/Users/thatcher/Desktop/Classes/Capstone/SpiceData/clem1-l-spice-6-v1.0/clsp_1000/data/')
    conn = sqlite3.connect('./spicedb.sqlite') # initialize db, this might move to an init func in the api
    c = conn.cursor()
    
    c.execute("CREATE TABLE SPICE (Mission TEXT)") # create missions table
    c.execute("ALTER TABLE SPICE ADD COLUMN Kernel TEXT")
    c.execute("ALTER TABLE SPICE ADD COLUMN File TEXT")
    c.execute("ALTER TABLE SPICE ADD COLUMN Path TEXT")
    c.execute("ALTER TABLE SPICE ADD COLUMN Hash TEXT") #theres probs a way to to this in one line
    c.execute("ALTER TABLE SPICE ADD COLUMN Newest TEXT")

    # ooh spicy tabs ~ we could probably just parse first two directories from full string?????
    for mis in [m for m in os.listdir(spdir) if not m[0] == '.']:
        for ker in [k for k in os.listdir(spdir+'/'+mis) if not k[0] == '.']:
             for root, subdir, files in os.walk(spdir+'/'+mis+'/'+ker):
                for name in files:
                    if not name[0] == ".": # ignore hidden files
                        print(name)
                        filepath = os.getcwd()
                        
                        if name in kern_dict:
                            newest = kern_dict[name]
                            
                        else:
                            newest = 'Newest'
                            
                        mis_hr = missions_readable[mis]
                        fhash = farmhash.hash64(str(io.open(os.path.join(root, name),'rb').read())) # spice data encoding is mixed, so read as binary
                        c.execute("INSERT OR IGNORE INTO SPICE (Mission, Kernel, File, Path, Hash, Newest) VALUES ('{mn}', '{kn}', '{fn}', '{fp}', '{fh}', '{new}')"
                                  .format(mn=mis_hr, kn=root.split('/')[-1], fn=name, fp=filepath, fh=fhash, new=newest))
    conn.commit()



In [237]:
create_spicedb('/Users/thatcher/Desktop/Classes/Capstone/SpiceData/')
conn = sqlite3.connect('./spicedb.sqlite')
c = conn.cursor()
c.execute("SELECT * FROM SPICE WHERE Kernel != 'Newest'")
# Note: fetchall() will pull the whole buffer, if you SELECT ten times, the result will be in there ten times
all_rows = c.fetchall()

aareadme.htm
aareadme.lbl
aareadme.txt
errata.txt
voldesc.cat
catinfo.txt
insthost.cat
mission.cat
person.cat
ref.cat
release.cat
spice_hsk.cat
spice_inst.cat
spiceds.cat
ckinfo.txt
clem_act_ck3.bc
clem_act_ck3.lbl
clem_moon_940127_940312.bdb
clem_moon_940127_940312.lbl
clem_moon_940312_940330.bdb
clem_moon_940312_940330.lbl
clem_moon_940330_940415.bdb
clem_moon_940330_940415.lbl
clem_moon_940415_940507.bdb
clem_moon_940415_940507.lbl
clem_sky_940201_940507.bdb
clem_sky_940201_940507.lbl
ekinfo.txt
clem_v20.lbl
clem_v20.tf
fkinfo.txt
moon_060721.lbl
moon_060721.tf
moon_assoc_me.lbl
moon_assoc_me.tf
moon_assoc_pa.lbl
moon_assoc_pa.tf
clem_astar_006.lbl
clem_astar_006.ti
clem_bstar_006.lbl
clem_bstar_006.ti
clem_cpt_002.lbl
clem_cpt_002.ti
clem_hires_008.lbl
clem_hires_008.ti
clem_lidar_005.lbl
clem_lidar_005.ti
clem_lwir_008.lbl
clem_lwir_008.ti
clem_nir_009.lbl
clem_nir_009.ti
clem_uvvis_008.lbl
clem_uvvis_008.ti
ikinfo.txt
lskinfo.txt
naif0008.lbl
naif0008.tls
moon_pa_de403_1950_2198.

In [238]:
for row in all_rows:
    print(row)

('clementine', 'clsp_1000', 'aareadme.htm', '/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db', '9860181418246867054', 'Newest')
('clementine', 'clsp_1000', 'aareadme.lbl', '/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db', '2900448953212383570', 'Newest')
('clementine', 'clsp_1000', 'aareadme.txt', '/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db', '17585396647624954768', 'Newest')
('clementine', 'clsp_1000', 'errata.txt', '/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db', '14362732480219937772', 'Newest')
('clementine', 'clsp_1000', 'voldesc.cat', '/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db', '12318166595063303068', 'Newest')
('clementine', 'catalog', 'catinfo.txt', '/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db', '1154749873628357165', 'Newest')
('clementine', 'catalog', 'insthost.cat', '/Users/thatcher/Desktop/Classe

In [None]:
conn.close()

In [28]:
# returns a dictionary of a single row from a sql select return
def sql_dict(sql_row):
    return { 'mission': sql_row[0],
             'kernel' : sql_row[1],
             'file'   : sql_row[2],
             'path'   : sql_row[3],
             'hash'   : sql_row[4],
             'newest' : sql_row[5] }

# returns an array of dictionaries of a whole sql select return
def sql_dict_array(sql_rows):
    dicts = []
    for row in sql_rows:
        dicts.append(sql_dict(row))
    return dicts
        

In [162]:
select_output = sql_dict_array(all_rows)
for d in select_output:
    print(json.dumps(d, indent=2))

{
  "mission": "clementine",
  "kernel": "clsp_1000",
  "file": "aareadme.htm",
  "path": "/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db",
  "hash": "9860181418246867054",
  "newest": 0
}
{
  "mission": "clementine",
  "kernel": "clsp_1000",
  "file": "aareadme.lbl",
  "path": "/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db",
  "hash": "2900448953212383570",
  "newest": 0
}
{
  "mission": "clementine",
  "kernel": "clsp_1000",
  "file": "aareadme.txt",
  "path": "/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db",
  "hash": "17585396647624954768",
  "newest": 0
}
{
  "mission": "clementine",
  "kernel": "clsp_1000",
  "file": "errata.txt",
  "path": "/Users/thatcher/Desktop/Classes/Capstone/spicerack/test_concepts/sqlite_db",
  "hash": "14362732480219937772",
  "newest": 0
}
{
  "mission": "clementine",
  "kernel": "clsp_1000",
  "file": "voldesc.cat",
  "path": "/Users/thatcher/Desktop/Classes/Capstone/s