In [1]:
import datasetdatabase as dsdb
import pandas as pd
import numpy as np
import pathlib
import quilt
import json
import os

In [2]:
mngr = dsdb.ConnectionManager(dsdb.LOCAL, user="jacksonb")
mngr

LOCAL:
	driver: sqlite
	link: /active/examples/local_database/local.db

In [3]:
local = mngr.connect(dsdb.LOCAL)
local

Recent Datasets:
--------------------------------------------------------------------------------

In [4]:
# create test upload dataset
fp_ex = pathlib.Path("/active/examples/fp_example/")
if not fp_ex.exists():
    os.makedirs(fp_ex)

np.random.seed(seed=12)

test = []
for i in range(10):
    fp =  fp_ex / (str(i) + ".json")
    with open(fp, "w") as write_out:
        json.dump({"hello": "world"}, write_out)
    
    d = {}
    d["strings"] = "foo" + str(i)
    d["bools"] = np.random.rand() < 0.5
    d["floats"] = np.random.rand() * 100
    d["ndarrays"] = np.random.rand(2, 2)
    d["tuples"] = tuple([1, 2, 3])
    d["sets"] = set([1, 2, 3, 3, 3])
    d["files"] = str(fp)
    test.append(d)

test = pd.DataFrame(test)
test.to_csv(fp_ex / "example.csv")
test

Unnamed: 0,bools,files,floats,ndarrays,sets,strings,tuples
0,True,/active/examples/fp_example/0.json,74.00497,"[[0.26331501518513467, 0.5337393933802977], [0...","{1, 2, 3}",foo0,"(1, 2, 3)"
1,False,/active/examples/fp_example/1.json,3.342143,"[[0.9569493362751168, 0.13720932135607644], [0...","{1, 2, 3}",foo1,"(1, 2, 3)"
2,False,/active/examples/fp_example/2.json,85.273554,"[[0.002259233518513537, 0.5212260272202929], [...","{1, 2, 3}",foo2,"(1, 2, 3)"
3,False,/active/examples/fp_example/3.json,16.071675,"[[0.7645604503388788, 0.020809797952066167], [...","{1, 2, 3}",foo3,"(1, 2, 3)"
4,True,/active/examples/fp_example/4.json,67.145265,"[[0.4712297782500141, 0.8161682980460269], [0....","{1, 2, 3}",foo4,"(1, 2, 3)"
5,False,/active/examples/fp_example/5.json,32.756948,"[[0.3346475291060558, 0.9780580790165189], [0....","{1, 2, 3}",foo5,"(1, 2, 3)"
6,False,/active/examples/fp_example/6.json,82.500925,"[[0.40664030180666166, 0.4513084114213143], [0...","{1, 2, 3}",foo6,"(1, 2, 3)"
7,True,/active/examples/fp_example/7.json,96.25969,"[[0.4192502702591062, 0.4240524465509987], [0....","{1, 2, 3}",foo7,"(1, 2, 3)"
8,True,/active/examples/fp_example/8.json,3.516826,"[[0.08427266973184566, 0.7325206981419501], [0...","{1, 2, 3}",foo8,"(1, 2, 3)"
9,True,/active/examples/fp_example/9.json,22.085252,"[[0.055019993340200135, 0.5232460707782919], [...","{1, 2, 3}",foo9,"(1, 2, 3)"


In [5]:
ds_info = local.upload_dataset(dataset=test,
                               name="test_dataset",
                               description="this is the hello world of dataset ingestion",
                               type_map={"bools": bool,
                                         "files": str,
                                         "floats": float,
                                         "ndarrays": np.ndarray,
                                         "strings": str},
                               store_files=False,
                               filepath_columns=["files"])
ds_info

Validating Dataset...
Creating Iota...
Creating Junction Items...
Dataset upload complete!


{'DatasetId': 1,
 'Name': 'test_dataset',
 'Description': 'this is the hello world of dataset ingestion',
 'SourceId': 1,
 'Created': '2018-07-11 20:58:45.067501'}

In [6]:
local

Recent Datasets:
--------------------------------------------------------------------------------
{'DatasetId': 1, 'Name': 'test_dataset', 'Description': 'this is the hello world of dataset ingestion', 'SourceId': 1, 'Created': '2018-07-11 20:58:45.067501'}

In [7]:
ds_info = local.upload_dataset(dataset=test)
ds_info

Validating Dataset...
Creating Iota...
Creating Junction Items...
Dataset upload complete!


{'DatasetId': 2,
 'Name': 'fms_pkl_e2af2d0b78717d8f1f0443343e007605',
 'Description': None,
 'SourceId': 1,
 'Created': '2018-07-11 20:58:46.932953'}

In [8]:
local

Recent Datasets:
--------------------------------------------------------------------------------
{'DatasetId': 1, 'Name': 'test_dataset', 'Description': 'this is the hello world of dataset ingestion', 'SourceId': 1, 'Created': '2018-07-11 20:58:45.067501'}
{'DatasetId': 2, 'Name': 'fms_pkl_e2af2d0b78717d8f1f0443343e007605', 'Description': None, 'SourceId': 1, 'Created': '2018-07-11 20:58:46.932953'}

In [10]:
local._deep_print()

------------------------------- DATASET DATABASE -------------------------------
--------------------------------------------------------------------------------
User:
rows: 1
recent:
{'UserId': 1, 'Name': 'jacksonb', 'Description': None, 'Created': '2018-07-11 20:58:43.657186'}
--------------------------------------------------------------------------------
Iota:
rows: 80
recent:
{'IotaId': 76, 'GroupId': 9, 'Key': 'ndarrays(Reshape)', 'Value': '(2, 2)', 'ValueType': "<class 'tuple'>", 'Created': '2018-07-11 20:58:45.007594'}
{'IotaId': 77, 'GroupId': 9, 'Key': 'ndarrays', 'Value': '[0.055019993340200135 0.5232460707782919   0.4163696572946102\n 0.048218749825570395]', 'ValueType': "<class 'numpy.ndarray'>", 'Created': '2018-07-11 20:58:45.007594'}
{'IotaId': 78, 'GroupId': 9, 'Key': 'sets', 'Value': '{1, 2, 3}', 'ValueType': "<class 'set'>", 'Created': '2018-07-11 20:58:45.031846'}
{'IotaId': 79, 'GroupId': 9, 'Key': 'strings', 'Value': 'foo9', 'ValueType': "<class 'str'>", 'Created'