In [1]:
import datasetdatabase as dsdb

In [2]:
dsdb.LOCAL

{'local': {'driver': 'sqlite', 'database': '/active/examples/local.db'}}

In [3]:
import pandas as pd
import numpy as np
import pathlib
import json

# for reproducibility I will set the seed here
np.random.seed(seed=12)

test = []
for i in range(4):
    d = {}
    d["strings"] = "foo" + str(i)
    d["bools"] = np.random.rand() < 0.5
    d["floats"] = np.random.rand() * 100
    d["ndarrays"] = np.random.rand(2, 2)
    d["tuples"] = tuple([1, 2, 3])
    d["sets"] = set([1, 2, 3, 3, 3])
    d["files"] = str(pathlib.Path("./EXPLAINER-connection_manager.ipynb"))
    test.append(d)

# convert this example both to dataframe
test = pd.DataFrame(test)
test

Unnamed: 0,bools,files,floats,ndarrays,sets,strings,tuples
0,True,EXPLAINER-connection_manager.ipynb,74.00497,"[[0.26331501518513467, 0.5337393933802977], [0...","{1, 2, 3}",foo0,"(1, 2, 3)"
1,False,EXPLAINER-connection_manager.ipynb,3.342143,"[[0.9569493362751168, 0.13720932135607644], [0...","{1, 2, 3}",foo1,"(1, 2, 3)"
2,False,EXPLAINER-connection_manager.ipynb,85.273554,"[[0.002259233518513537, 0.5212260272202929], [...","{1, 2, 3}",foo2,"(1, 2, 3)"
3,False,EXPLAINER-connection_manager.ipynb,16.071675,"[[0.7645604503388788, 0.020809797952066167], [...","{1, 2, 3}",foo3,"(1, 2, 3)"


In [4]:
t = dsdb.Dataset(test,
                 name="test dataset",
                 description="this is the hello world of dataset creation",
                 filepath_columns="files",
                 type_validation_map={"bools": bool,
                                      "files": str,
                                      "floats": float,
                                      "ndarrays": np.ndarray,
                                      "sets": set,
                                      "strings": str,
                                      "tuples": tuple},
                 import_as_type_map=True)
t

Casting values to type map...
Checking files exist...
Checking dataset value types...


info: None
df: True
name: test dataset
description: this is the hello world of dataset creation
filepath_columns: files
validated_items: {'types': True, 'values': False, 'files': True}

In [5]:
t.enforce_values_using_map({"tuples": lambda x: x == (1, 2, 3),
                            "floats": lambda x: x >= 0})
t

Checking dataset values...


info: None
df: True
name: test dataset
description: this is the hello world of dataset creation
filepath_columns: files
validated_items: {'types': True, 'values': True, 'files': True}

In [6]:
ser = t.save("test")
ser

Checking files exist...


PosixPath('test.dataset')

In [7]:
des = dsdb.read_dataset(ser)
des

info: None
df: True
name: test dataset
description: this is the hello world of dataset creation
filepath_columns: files
validated_items: {'types': False, 'values': False, 'files': True}

In [8]:
import hashlib
import pickle
hashlib.md5(pickle.dumps(t.df)).hexdigest() == hashlib.md5(pickle.dumps(des.df)).hexdigest()

True

In [9]:
local = dsdb.DatasetDatabase(config=dsdb.LOCAL, user="jacksonb", build=True)
local

Recent Datasets:
--------------------------------------------------------------------------------

In [10]:
local.recent

------------------------------- DATASET DATABASE -------------------------------
--------------------------------------------------------------------------------
Recent User:
{'UserId': 1, 'Name': 'jacksonb', 'Description': None, 'Created': '2018-08-20 21:28:36.320321'}
--------------------------------------------------------------------------------
Recent Iota:
--------------------------------------------------------------------------------
Recent Group:
--------------------------------------------------------------------------------
Recent Source:
--------------------------------------------------------------------------------
Recent FileSource:
--------------------------------------------------------------------------------
Recent QuiltSource:
--------------------------------------------------------------------------------
Recent Dataset:
--------------------------------------------------------------------------------
Recent IotaGroupJunction:
---------------------------------------