Skip to content

Commit

Permalink
Project renamed. Added support for YAML.
Browse files Browse the repository at this point in the history
  • Loading branch information
ricekab committed Mar 23, 2018
1 parent fb064b0 commit a14fab5
Show file tree
Hide file tree
Showing 11 changed files with 154 additions and 44 deletions.
48 changes: 36 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
# sqlalchemy-jsonseeder
Seed SQLAlchemy database with JSON formatted data. Supports references to other entities (and their fields)
# sqlalchemy-seeder
Seed SQLAlchemy database with a simple data format. Supports references to other entities (and their fields)
that are defined alongside it or persisted in the database.

## Requirements & Installation

Runs on Python 2.7 or Python 3

- Dependencies
* sqlalchemy
* jsonschema
#### Dependencies
* sqlalchemy
* jsonschema
* pyyaml

* Installation
`pip install sqlalchemy-jsonseeder`
#### Installation
`pip install sqlalchemy-seeder`

## Usage
Currently there are 2 seeders available: `BasicSeeder` and `ResolvingSeeder`.
Expand All @@ -20,19 +21,25 @@ Currently there are 2 seeders available: `BasicSeeder` and `ResolvingSeeder`.
It does not perform any logic to validate or resolve the values. Wrong values will cause a `KeyError`.

`ResolvingSeeder` allows you to define multiple entities in one file as well as define referential values.
This requires some special JSON format so the seeder will know how to resolve them.
This requires some formatting of the data so the seeder will know how to resolve them.

ResolvingSeeder requires a session to be provided that it uses to query the database (and flush/commit as required).
ResolvingSeeder requires a session to be provided that it uses to query the database to resolve references
(and flush/commit as requested).

Since it has to be made aware of classes
Since it has to be made aware of classes they have to be registered to be found. If a class path is provided but not
recognized it will try to register the path before it continues.

### JSON Structure
### Currently supported data formats
* JSON
* YAML

### Data format structure

The top structure is composed out of one or more `entity group` objects which define a target class and a data block.
The data block in turn contains one or more `entity data` blocks which then contains simple key-value pairs alongside
the special `!refs` key where references are defined.

The general structure is outlined here, for some complete examples see further below.
The general structure is outlined here (using JSON), for some complete examples see further below.

* Entity Group

Expand Down Expand Up @@ -211,6 +218,23 @@ populated with the object that is created from this schema.
}
]

This same example in yaml:


- target_class: Country
data:
name: United Kingdom
short: UK
- target_class: Airport,
data:
icao: EGLL
name: London Heathrow
'!refs': <-- Due to '!' it has to be surrounded in quotes.
country:
target_class: Country,
criteria:
short: UK



## Issues
Expand Down
2 changes: 0 additions & 2 deletions jsonseeder/__init__.py

This file was deleted.

2 changes: 2 additions & 0 deletions seeder/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from seeder.basic_seeder import BasicSeeder
from seeder.resolving_seeder import ResolvingSeeder
File renamed without changes.
File renamed without changes.
30 changes: 22 additions & 8 deletions jsonseeder/resolving_seeder.py → seeder/resolving_seeder.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@

import jsonschema
import pkg_resources
from jsonseeder.exceptions import AmbiguousReferenceError, UnresolvedReferencesError, EntityBuildError
import yaml
from seeder.exceptions import AmbiguousReferenceError, UnresolvedReferencesError, EntityBuildError
from sqlalchemy import inspect as sainsp
from sqlalchemy.exc import NoInspectionAvailable
from sqlalchemy.orm.exc import MultipleResultsFound
Expand Down Expand Up @@ -101,22 +102,35 @@ class ResolvingSeeder(object):

def __init__(self, session):
self.session = session
schema_string = pkg_resources.resource_string('jsonseeder', VALIDATION_SCHEMA_RSC)
schema_string = pkg_resources.resource_string('seeder', VALIDATION_SCHEMA_RSC)
self.validation_schema = json.loads(schema_string)
self.registry = ClassRegistry()

def load_entities_from_file(self, seed_file, separate_by_class=False, flush_on_create=True, commit=False):
def load_entities_from_json_file(self, seed_file, separate_by_class=False, flush_on_create=True, commit=False):
with open(seed_file, 'rt') as json_file:
json_string = json_file.read()
json_data = json.loads(json_string)
return self.load_entities_from_json_dict(json_data, separate_by_class, flush_on_create, commit)
return self.load_entities_from_json_string(json_string, separate_by_class, flush_on_create, commit)

def load_entities_from_json_dict(self, seed_data, separate_by_class=False, flush_on_create=True, commit=False):
def load_entities_from_json_string(self, json_string, separate_by_class=False, flush_on_create=True, commit=False):
data = json.loads(json_string)
return self.load_entities_from_data_dict(data, separate_by_class, flush_on_create, commit)

def load_entities_from_yaml_file(self, seed_file, separate_by_class=False, flush_on_create=True, commit=False):
with open(seed_file, 'rt') as yaml_file:
yaml_string = yaml_file.read()
return self.load_entities_from_json_string(yaml_string, separate_by_class, flush_on_create, commit)

def load_entities_from_yaml_string(self, yaml_string, separate_by_class=False, flush_on_create=True, commit=False):
data = yaml.load(yaml_string)
return self.load_entities_from_data_dict(data, separate_by_class, flush_on_create, commit)

def load_entities_from_data_dict(self, seed_data, separate_by_class=False, flush_on_create=True, commit=False):
"""
:param seed_data: The json formatted entity dict or list. This collection can be modified by the resolver.
:param seed_data: The formatted entity dict or list. This collection can be modified by the resolver.
:param separate_by_class: Whether the output should separate entities by class (in a dict)
:param flush_on_create: Whether entities should be flushed once they are created. Note that the provided session
could be configured with `autoflush=True` in which case flushes can still happen.
could be configured with `autoflush=True` in which case flushes can still happen. Flushes are useful in that
they generate the ids that can then be referenced.
:param commit: Whether the session should be committed after entities are generated.
:return: List of entities or a dictionary mapping of classes to a list of entities based on separate_by_class.
:raise ValidationError: If the provided data does not conform to the expected json structure.
Expand Down
File renamed without changes.
14 changes: 7 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from setuptools import setup, find_packages

setup(
name='sqlalchemy-jsonseeder',
version='0.1.2',
name='sqlalchemy-seeder',
version='0.2.0',
packages=find_packages(exclude=["tests"]),
package_data={"jsonseeder": ["resources/*"]},
url='https://github.com/RiceKab/sqlalchemy-jsonseeder',
package_data={"seeder": ["resources/*"]},
url='https://github.com/RiceKab/sqlalchemy-seeder',
license='MIT',
author='Kevin CY Tang',
author_email='kevin@cyborn.be',
keywords='sqlalchemy json seed',
description="Tool for creating (and persisting) SQLAlchemy entities from JSON formatted data.",
keywords='sqlalchemy json yaml seed',
description="Tool for creating (and persisting) SQLAlchemy entities from a simple data format.",
classifiers=[
'Development Status :: 3 - Alpha',
'Intended Audience :: Developers',
Expand All @@ -19,7 +19,7 @@
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3'
],
install_requires=['SQLAlchemy', 'jsonschema'],
install_requires=['SQLAlchemy', 'jsonschema', 'pyyaml'],
test_requires=["pytest"],
python_requires='>=2.7'
)
2 changes: 1 addition & 1 deletion tests/test_basic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from jsonseeder.basic_seeder import BasicSeeder
from seeder.basic_seeder import BasicSeeder


def test_basic_from_dict(model):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_registry.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from jsonseeder.resolving_seeder import ClassRegistry
from seeder.resolving_seeder import ClassRegistry


@pytest.fixture()
Expand Down
98 changes: 85 additions & 13 deletions tests/test_resolver.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest
from jsonschema import ValidationError
from jsonseeder.exceptions import UnresolvedReferencesError, AmbiguousReferenceError
from jsonseeder.resolving_seeder import ResolvingSeeder
from seeder.exceptions import UnresolvedReferencesError, AmbiguousReferenceError
from seeder.resolving_seeder import ResolvingSeeder


@pytest.fixture()
Expand All @@ -27,7 +27,7 @@ def resolver_populated(session, model):


def test_resolver_basic(model, resolver_populated, session):
entities = resolver_populated.load_entities_from_json_dict(COUNTRY_SINGLE_OK, commit=True)
entities = resolver_populated.load_entities_from_data_dict(COUNTRY_SINGLE_OK, commit=True)
assert len(entities) == 1
country = entities[0]
retrieved_countries = session.query(model.Country).all()
Expand All @@ -47,7 +47,7 @@ def test_resolver_basic(model, resolver_populated, session):

def test_resolver_single_bad_format(model, resolver_populated, session):
with pytest.raises(ValidationError):
resolver_populated.load_entities_from_json_dict(COUNTRY_SINGLE_BAD_FORMAT)
resolver_populated.load_entities_from_data_dict(COUNTRY_SINGLE_BAD_FORMAT)


COUNTRY_LIST_COMBINED_OK = {
Expand All @@ -65,7 +65,7 @@ def test_resolver_single_bad_format(model, resolver_populated, session):


def test_resolver_combined(model, resolver_populated, session):
entities = resolver_populated.load_entities_from_json_dict(COUNTRY_LIST_COMBINED_OK, commit=True)
entities = resolver_populated.load_entities_from_data_dict(COUNTRY_LIST_COMBINED_OK, commit=True)
assert len(entities) == 2
retrieved_countries = session.query(model.Country).all()
assert len(retrieved_countries) == 2
Expand Down Expand Up @@ -94,7 +94,7 @@ def test_resolver_combined(model, resolver_populated, session):


def test_resolver_separate(model, resolver_populated, session):
entities = resolver_populated.load_entities_from_json_dict(COUNTRY_LIST_SEPARATE_OK, commit=True)
entities = resolver_populated.load_entities_from_data_dict(COUNTRY_LIST_SEPARATE_OK, commit=True)
assert len(entities) == 2
retrieved_countries = session.query(model.Country).all()
assert len(retrieved_countries) == 2
Expand Down Expand Up @@ -123,7 +123,7 @@ def test_resolver_reference_entity(model, resolver_populated, session):
country = model.Country(name="United Kingdom", short="UK")
session.add(country)
session.commit()
entities = resolver_populated.load_entities_from_json_dict(AIRPORT_COUNTRY_REFERENCE_ENTITY_OK, commit=True)
entities = resolver_populated.load_entities_from_data_dict(AIRPORT_COUNTRY_REFERENCE_ENTITY_OK, commit=True)
assert len(entities) == 1
airport = entities[0]
assert airport.country.id == airport.country_id == country.id
Expand Down Expand Up @@ -154,7 +154,7 @@ def test_resolver_reference_entity(model, resolver_populated, session):
def test_resolver_reference_field(model, resolver_populated, session):
session.add(model.Country(name="United Kingdom", short="UK"))
session.commit()
entities = resolver_populated.load_entities_from_json_dict(AIRPORT_COUNTRY_REFERENCE_FIELD_OK, commit=True)
entities = resolver_populated.load_entities_from_data_dict(AIRPORT_COUNTRY_REFERENCE_FIELD_OK, commit=True)
assert len(entities) == 1
airport = entities[0]
assert airport.country.id == airport.country_id
Expand Down Expand Up @@ -184,7 +184,7 @@ def test_resolver_bad_reference(model, resolver_populated, session):
# UK never added
assert len(session.query(model.Country).all()) == 0
with pytest.raises(UnresolvedReferencesError):
entities = resolver_populated.load_entities_from_json_dict(AIRPORT_COUNTRY_REFERENCE_FIELD_BAD, commit=True)
entities = resolver_populated.load_entities_from_data_dict(AIRPORT_COUNTRY_REFERENCE_FIELD_BAD, commit=True)
assert entities[0].country is None
assert entities[0].country_id is None

Expand Down Expand Up @@ -213,7 +213,7 @@ def test_resolver_ambiguous_reference(model, resolver_populated, session):
session.commit()
assert len(session.query(model.Country).filter_by(short="UK").all()) == 2
with pytest.raises(AmbiguousReferenceError):
resolver_populated.load_entities_from_json_dict(AIRPORT_COUNTRY_REFERENCE_FIELD_AMBIGUOUS, commit=True)
resolver_populated.load_entities_from_data_dict(AIRPORT_COUNTRY_REFERENCE_FIELD_AMBIGUOUS, commit=True)


AIRPORT_COUNTRY_PARALLEL_OK = [
Expand Down Expand Up @@ -245,7 +245,7 @@ def test_resolver_ambiguous_reference(model, resolver_populated, session):


def test_resolver_parallel(model, resolver_populated, session):
entities = resolver_populated.load_entities_from_json_dict(AIRPORT_COUNTRY_PARALLEL_OK, commit=True)
entities = resolver_populated.load_entities_from_data_dict(AIRPORT_COUNTRY_PARALLEL_OK, commit=True)
assert len(entities) == 2
airport = session.query(model.Airport).first()
assert airport.country.id == airport.country_id
Expand Down Expand Up @@ -282,11 +282,83 @@ def test_resolver_parallel(model, resolver_populated, session):


def test_resolver_separate_by_class(model, resolver_populated, session):
entities = resolver_populated.load_entities_from_json_dict(AIRPORT_COUNTRY_SEPARATE_BY_CLASS, commit=True, separate_by_class=True)
entities = resolver_populated.load_entities_from_data_dict(AIRPORT_COUNTRY_SEPARATE_BY_CLASS, commit=True,
separate_by_class=True)
assert len(entities[model.Airport]) == 1
assert len(entities[model.Country]) == 1

# Inline nested structure makes it too complex so the feature is not planned currently.

JSON_STRING = '''
[
{
"target_class": "Country",
"data": [
{
"name": "United Kingdom",
"short": "UK"
},
{
"name": "Belgium",
"short": "BE"
}
]
},
{
"target_class": "Airport",
"data": {
"icao": "EGLL",
"name": "London Heathrow",
"!refs": {
"country": {
"target_class": "Country",
"criteria": {
"short": "UK"
}
}
}
}
}
]'''


def test_resolver_json_string(model, resolver_populated, session):
entities = resolver_populated.load_entities_from_json_string(JSON_STRING, commit=True, separate_by_class=True)
heathrow = session.query(model.Airport).filter_by(icao="EGLL").one()
assert len(entities[model.Airport]) == len(session.query(model.Airport).all()) == 1
assert len(entities[model.Country]) == len(session.query(model.Country).all()) == 2
assert heathrow.name == "London Heathrow"
assert heathrow.country == session.query(model.Country).filter_by(short="UK").one()


YAML_STRING = '''
- target_class: Country
data:
- name: United Kingdom
short: UK
- name: Belgium
short: BE
- target_class: Airport
data:
icao: EGLL
name: London Heathrow
"!refs":
country:
target_class: Country
criteria:
short: UK
'''


def test_resolver_yaml_string(model, resolver_populated, session):
entities = resolver_populated.load_entities_from_yaml_string(YAML_STRING, commit=True, separate_by_class=True)
heathrow = session.query(model.Airport).filter_by(icao="EGLL").one()
assert len(entities[model.Airport]) == len(session.query(model.Airport).all()) == 1
assert len(entities[model.Country]) == len(session.query(model.Country).all()) == 2
assert heathrow.name == "London Heathrow"
assert heathrow.country == session.query(model.Country).filter_by(short="UK").one()

# Inline nested structure makes it too complex so the feature is not planned currently. May revisit it soon.

# AIRPORT_COUNTRY_INLINE_OK = {
# "target_class": "Airport",
Expand Down

0 comments on commit a14fab5

Please sign in to comment.