In [1]:
# imports
import datasetdatabase as dsdb
import pandas as pd
import numpy as np
import pathlib
import quilt
import json
import os

In [2]:
# connection manager
mngr = dsdb.ConnectionManager(dsdb.LOCAL, user="jacksonb")
mngr

LOCAL:
	driver: sqlite
	link: /active/examples/local_database/local.db

In [3]:
# database manager
local = mngr.connect(dsdb.LOCAL)
local

Recent Datasets:
--------------------------------------------------------------------------------

In [4]:
# create test upload dataset
fp_ex = pathlib.Path("/active/examples/fp_example/")
if not fp_ex.exists():
    os.makedirs(fp_ex)

np.random.seed(seed=12)

test = []
for i in range(10):
    fp =  fp_ex / (str(i) + ".json")
    with open(fp, "w") as write_out:
        json.dump({"hello": "world"}, write_out)
    
    d = {}
    d["strings"] = "foo" + str(i)
    d["bools"] = np.random.rand() < 0.5
    d["floats"] = np.random.rand() * 100
    d["ndarrays"] = np.random.rand(2, 2)
    d["tuples"] = tuple([1, 2, 3])
    d["sets"] = set([1, 2, 3, 3, 3])
    d["files"] = str(fp)
    test.append(d)

test = pd.DataFrame(test)
test.to_csv(fp_ex / "example.csv")
test

Unnamed: 0,bools,files,floats,ndarrays,sets,strings,tuples
0,True,/active/examples/fp_example/0.json,74.00497,"[[0.26331501518513467, 0.5337393933802977], [0...","{1, 2, 3}",foo0,"(1, 2, 3)"
1,False,/active/examples/fp_example/1.json,3.342143,"[[0.9569493362751168, 0.13720932135607644], [0...","{1, 2, 3}",foo1,"(1, 2, 3)"
2,False,/active/examples/fp_example/2.json,85.273554,"[[0.002259233518513537, 0.5212260272202929], [...","{1, 2, 3}",foo2,"(1, 2, 3)"
3,False,/active/examples/fp_example/3.json,16.071675,"[[0.7645604503388788, 0.020809797952066167], [...","{1, 2, 3}",foo3,"(1, 2, 3)"
4,True,/active/examples/fp_example/4.json,67.145265,"[[0.4712297782500141, 0.8161682980460269], [0....","{1, 2, 3}",foo4,"(1, 2, 3)"
5,False,/active/examples/fp_example/5.json,32.756948,"[[0.3346475291060558, 0.9780580790165189], [0....","{1, 2, 3}",foo5,"(1, 2, 3)"
6,False,/active/examples/fp_example/6.json,82.500925,"[[0.40664030180666166, 0.4513084114213143], [0...","{1, 2, 3}",foo6,"(1, 2, 3)"
7,True,/active/examples/fp_example/7.json,96.25969,"[[0.4192502702591062, 0.4240524465509987], [0....","{1, 2, 3}",foo7,"(1, 2, 3)"
8,True,/active/examples/fp_example/8.json,3.516826,"[[0.08427266973184566, 0.7325206981419501], [0...","{1, 2, 3}",foo8,"(1, 2, 3)"
9,True,/active/examples/fp_example/9.json,22.085252,"[[0.055019993340200135, 0.5232460707782919], [...","{1, 2, 3}",foo9,"(1, 2, 3)"


In [5]:
# upload and return the dataset info
# first upload
ds_info = local.upload_dataset(dataset=test,
                               name="test_dataset",
                               description="this is the hello world of dataset ingestion",
                               type_map={"bools": bool,
                                         "files": str,
                                         "floats": float,
                                         "ndarrays": np.ndarray,
                                         "strings": str},
                               store_files=False,
                               filepath_columns=["files"])
ds_info

Validating Dataset...
Creating Iota...
Creating Junction Items...
Dataset upload complete!


{'DatasetId': 1,
 'Name': 'test_dataset',
 'Description': 'this is the hello world of dataset ingestion',
 'SourceId': 1,
 'Created': '2018-07-12 17:49:24.785137'}

In [6]:
# recent datasets
local

Recent Datasets:
--------------------------------------------------------------------------------
{'DatasetId': 1, 'Name': 'test_dataset', 'Description': 'this is the hello world of dataset ingestion', 'SourceId': 1, 'Created': '2018-07-12 17:49:24.785137'}

In [7]:
# attempt to break datasets
# upload same dataset
# no name
ds_info = local.upload_dataset(dataset=test)
ds_info

{'DatasetId': 1,
 'Name': 'test_dataset',
 'Description': 'this is the hello world of dataset ingestion',
 'SourceId': 1,
 'Created': '2018-07-12 17:49:24.785137'}

In [8]:
# no changes
# no new datasets uploaded
local

Recent Datasets:
--------------------------------------------------------------------------------
{'DatasetId': 1, 'Name': 'test_dataset', 'Description': 'this is the hello world of dataset ingestion', 'SourceId': 1, 'Created': '2018-07-12 17:49:24.785137'}

In [9]:
# generate new dataset
np.random.seed(seed=3)

upload_test = []
for i in range(10):
    fp =  fp_ex / (str(i) + ".json")
    with open(fp, "w") as write_out:
        json.dump({"hello": "world"}, write_out)
    
    d = {}
    d["strings"] = "foo" + str(i)
    d["bools"] = np.random.rand() < 0.5
    d["floats"] = np.random.rand() * 100
    d["ndarrays"] = np.random.rand(2, 2)
    d["tuples"] = tuple([1, 2, 3])
    d["sets"] = set([1, 2, 3, 3, 3])
    d["files"] = str(fp)
    upload_test.append(d)

upload_test = pd.DataFrame(upload_test)
upload_test.to_csv(fp_ex / "example.csv")
upload_test

Unnamed: 0,bools,files,floats,ndarrays,sets,strings,tuples
0,False,/active/examples/fp_example/0.json,70.814782,"[[0.2909047389129443, 0.510827605197663], [0.8...","{1, 2, 3}",foo0,"(1, 2, 3)"
1,True,/active/examples/fp_example/1.json,20.724288,"[[0.05146720330082988, 0.44080984365063647], [...","{1, 2, 3}",foo1,"(1, 2, 3)"
2,False,/active/examples/fp_example/2.json,27.848728,"[[0.6762549019801313, 0.5908628174163508], [0....","{1, 2, 3}",foo2,"(1, 2, 3)"
3,True,/active/examples/fp_example/3.json,41.51012,"[[0.28352508177131874, 0.6931379183129963], [0...","{1, 2, 3}",foo3,"(1, 2, 3)"
4,False,/active/examples/fp_example/4.json,78.031476,"[[0.30636353237617975, 0.2219578839321814], [0...","{1, 2, 3}",foo4,"(1, 2, 3)"
5,False,/active/examples/fp_example/5.json,67.238368,"[[0.9028341085383981, 0.8457508712931793], [0....","{1, 2, 3}",foo5,"(1, 2, 3)"
6,False,/active/examples/fp_example/6.json,55.784076,"[[0.3615647630625781, 0.22505450483983191], [0...","{1, 2, 3}",foo6,"(1, 2, 3)"
7,True,/active/examples/fp_example/7.json,29.179277,"[[0.45768639932297794, 0.8605339129469265], [0...","{1, 2, 3}",foo7,"(1, 2, 3)"
8,True,/active/examples/fp_example/8.json,45.462208,"[[0.20541034460498753, 0.20137871104307314], [...","{1, 2, 3}",foo8,"(1, 2, 3)"
9,True,/active/examples/fp_example/9.json,36.217621,"[[0.7076866218156711, 0.7467462231020046], [0....","{1, 2, 3}",foo9,"(1, 2, 3)"


In [10]:
# new dataset
# upload files to fms
ds_info = local.upload_dataset(dataset=upload_test, filepath_columns="files")
ds_info

Validating Dataset...
Creating Iota...
Creating Junction Items...
Dataset upload complete!


{'DatasetId': 2,
 'Name': 'fms_pkl_7a786f58d21782569a3a4aa11852029c',
 'Description': None,
 'SourceId': 2,
 'Created': '2018-07-12 17:49:29.199027'}

In [11]:
local

Recent Datasets:
--------------------------------------------------------------------------------
{'DatasetId': 1, 'Name': 'test_dataset', 'Description': 'this is the hello world of dataset ingestion', 'SourceId': 1, 'Created': '2018-07-12 17:49:24.785137'}
{'DatasetId': 2, 'Name': 'fms_pkl_7a786f58d21782569a3a4aa11852029c', 'Description': None, 'SourceId': 2, 'Created': '2018-07-12 17:49:29.199027'}

In [12]:
local.get_dataset(1)

Unnamed: 0,bools,files,floats,ndarrays,sets,strings,tuples
0,True,/active/examples/fp_example/0.json,74.00497,"[[0.26331501518513467, 0.5337393933802977], [0...","{1, 2, 3}",foo0,"(1, 2, 3)"
1,False,/active/examples/fp_example/1.json,3.342143,"[[0.9569493362751168, 0.13720932135607644], [0...","{1, 2, 3}",foo1,"(1, 2, 3)"
2,False,/active/examples/fp_example/2.json,85.273554,"[[0.002259233518513537, 0.5212260272202929], [...","{1, 2, 3}",foo2,"(1, 2, 3)"
3,False,/active/examples/fp_example/3.json,16.071675,"[[0.7645604503388788, 0.020809797952066167], [...","{1, 2, 3}",foo3,"(1, 2, 3)"
4,True,/active/examples/fp_example/4.json,67.145265,"[[0.4712297782500141, 0.8161682980460269], [0....","{1, 2, 3}",foo4,"(1, 2, 3)"
5,False,/active/examples/fp_example/5.json,32.756948,"[[0.3346475291060558, 0.9780580790165189], [0....","{1, 2, 3}",foo5,"(1, 2, 3)"
6,False,/active/examples/fp_example/6.json,82.500925,"[[0.40664030180666166, 0.4513084114213143], [0...","{1, 2, 3}",foo6,"(1, 2, 3)"
7,True,/active/examples/fp_example/7.json,96.25969,"[[0.4192502702591062, 0.4240524465509987], [0....","{1, 2, 3}",foo7,"(1, 2, 3)"
8,True,/active/examples/fp_example/8.json,3.516826,"[[0.08427266973184566, 0.7325206981419501], [0...","{1, 2, 3}",foo8,"(1, 2, 3)"
9,True,/active/examples/fp_example/9.json,22.085252,"[[0.055019993340200135, 0.5232460707782919], [...","{1, 2, 3}",foo9,"(1, 2, 3)"


In [13]:
local.get_dataset(2)

Unnamed: 0,bools,files,floats,ndarrays,sets,strings,tuples
0,False,/home/jovyan/.local/share/QuiltCli/quilt_packa...,70.814782,"[[0.2909047389129443, 0.510827605197663], [0.8...","{1, 2, 3}",foo0,"(1, 2, 3)"
1,True,/home/jovyan/.local/share/QuiltCli/quilt_packa...,20.724288,"[[0.05146720330082988, 0.44080984365063647], [...","{1, 2, 3}",foo1,"(1, 2, 3)"
2,False,/home/jovyan/.local/share/QuiltCli/quilt_packa...,27.848728,"[[0.6762549019801313, 0.5908628174163508], [0....","{1, 2, 3}",foo2,"(1, 2, 3)"
3,True,/home/jovyan/.local/share/QuiltCli/quilt_packa...,41.51012,"[[0.28352508177131874, 0.6931379183129963], [0...","{1, 2, 3}",foo3,"(1, 2, 3)"
4,False,/home/jovyan/.local/share/QuiltCli/quilt_packa...,78.031476,"[[0.30636353237617975, 0.2219578839321814], [0...","{1, 2, 3}",foo4,"(1, 2, 3)"
5,False,/home/jovyan/.local/share/QuiltCli/quilt_packa...,67.238368,"[[0.9028341085383981, 0.8457508712931793], [0....","{1, 2, 3}",foo5,"(1, 2, 3)"
6,False,/home/jovyan/.local/share/QuiltCli/quilt_packa...,55.784076,"[[0.3615647630625781, 0.22505450483983191], [0...","{1, 2, 3}",foo6,"(1, 2, 3)"
7,True,/home/jovyan/.local/share/QuiltCli/quilt_packa...,29.179277,"[[0.45768639932297794, 0.8605339129469265], [0...","{1, 2, 3}",foo7,"(1, 2, 3)"
8,True,/home/jovyan/.local/share/QuiltCli/quilt_packa...,45.462208,"[[0.20541034460498753, 0.20137871104307314], [...","{1, 2, 3}",foo8,"(1, 2, 3)"
9,True,/home/jovyan/.local/share/QuiltCli/quilt_packa...,36.217621,"[[0.7076866218156711, 0.7467462231020046], [0....","{1, 2, 3}",foo9,"(1, 2, 3)"


In [14]:
local._deep_print()

------------------------------- DATASET DATABASE -------------------------------
--------------------------------------------------------------------------------
User:
rows: 1
recent:
{'UserId': 1, 'Name': 'jacksonb', 'Description': None, 'Created': '2018-07-12 17:49:23.612919'}
--------------------------------------------------------------------------------
Iota:
rows: 114
recent:
{'IotaId': 76, 'GroupId': 9, 'Key': 'ndarrays(Reshape)', 'Value': '(2, 2)', 'ValueType': "<class 'tuple'>", 'Created': '2018-07-12 17:49:24.698693'}
{'IotaId': 114, 'GroupId': 9, 'Key': 'ndarrays', 'Value': '[0.7076866218156711 0.7467462231020046 0.6910929218557643\n 0.6891804137549141]', 'ValueType': "<class 'numpy.ndarray'>", 'Created': '2018-07-12 17:49:29.164747'}
{'IotaId': 78, 'GroupId': 9, 'Key': 'sets', 'Value': '{1, 2, 3}', 'ValueType': "<class 'set'>", 'Created': '2018-07-12 17:49:24.726636'}
{'IotaId': 79, 'GroupId': 9, 'Key': 'strings', 'Value': 'foo9', 'ValueType': "<class 'str'>", 'Created': '2