# Frictionless SQLModel

<br>

### Imports

In [1]:
import json
import requests
from copy import copy

from powerdict import db, schemas, frictionless

In [2]:
from IPython.display import JSON

<br>

### Frictionless Specifications

A Frictionless `DataPackage` contains one or more `DataResource`s that have a corresponding `DataSchema` with `FieldDescriptor`s that describe individual columns within datasets.

Here we'll load in the schema for a `DataPackage`

In [3]:
JSON(schemas.DataPackage.schema())

<IPython.core.display.JSON object>

In [4]:
schemas.DataPackage.schema_json()

'{"title": "DataPackage", "type": "object", "properties": {"name": {"title": "Name", "pattern": "^[a-z0-9_\\\\.,-]*$", "type": "string"}, "id": {"title": "Id", "anyOf": [{"type": "string", "format": "uuid"}, {}]}, "profile": {"default": "tabular-data-package", "allOf": [{"$ref": "#/definitions/DataPackageProfile"}]}, "title": {"title": "Title", "type": "string"}, "description": {"title": "Description", "type": "string"}, "homepage": {"title": "Homepage", "type": "string"}, "version": {"title": "Version", "pattern": "^(0|[1-9]\\\\d*)\\\\.(0|[1-9]\\\\d*)\\\\.(0|[1-9]\\\\d*)(?:-((?:0|[1-9]\\\\d*|\\\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\\\.(?:0|[1-9]\\\\d*|\\\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\\\+([0-9a-zA-Z-]+(?:\\\\.[0-9a-zA-Z-]+)*))?$", "type": "string"}, "keywords": {"title": "Keywords", "type": "array", "items": {"type": "string"}}, "image": {"title": "Image", "anyOf": [{"type": "string", "format": "path"}, {"type": "string", "minLength": 1, "maxLength": 65536, "format": "uri"}]}, "created"

<br>

We'll now load in an example frictionless dataset

In [5]:
def load_raw_fd_package(fd_fp):
    with open(fd_fp, 'r') as f:
        raw_fd_package = json.load(f)
        
    return raw_fd_package

In [6]:
fd_fp = '../tests/data/s-and-p-500-companies.json'
fd_fp = '../tests/data/repd-metadata.json'

raw_package_metadata = load_raw_fd_package(fd_fp)

JSON([raw_package_metadata])

<IPython.core.display.JSON object>

<br>

We can now parse and validate this raw package metadata using our schema

In [7]:
package_metadata = schemas.DataPackage.parse_obj(raw_package_metadata)

package_metadata

DataPackage(name='renewable-energy-planning-database', id=None, profile=<DataPackageProfile.tabular_data_package: 'tabular-data-package'>, title='Renewable Energy Planning Database', description="The Renewable Energy Planning Database ('REPD') is managed by Barbour ABI on behalf of the Department of Business, Energy & Industrial Strategy ('BEIS’). The databases track the progress of renewable electricity projects (including those that could also be used for CHP), and electricity storage projects  from inception, through planning, construction, operation and decommissioning.\n\nThe REPD is updated on a quarterly basis, and contains information on all Renewable Electricity and CHP projects up to the end of the previous calendar month.", homepage='https://www.gov.uk/government/publications/renewable-energy-planning-database-monthly-extract', version='0.1.0', keywords=None, image=None, created=None, resources=[DataResource(name='renewable-energy-planning-database', data=None, path=PosixPat

<br>

### Saving to a DB

In [8]:
db_fp = '../tests/data/dictionary.db'

In [9]:
db_client = db.DbClient(
    database_name=db_fp,
    dialect='sqlite',
    driver=None,
)

db_client.create_tables()

db_client

<powerdict.db.DbClient at 0x1370bea60>

In [10]:
raw_package_metadata = load_raw_fd_package(fd_fp)
fd_package = frictionless.save_fd_package_to_db(raw_package_metadata, db_client)

fd_package.data_package_id

UUID('2c815271-d51a-4ac9-86da-2793465bd477')

In [12]:
db_client.get_all('fd__data_package').head()

Unnamed: 0,profile,description,version,keywords,created,name,id,title,homepage,image,data_package_id
0,tabular-data-package,List of companies in the S&P 500 (Standard and...,1.0.0,,,s-and-p-500-companies,core/s-and-p-500-companies,S&P 500 Companies with Financial Information,,,4b782395-d70d-485a-be81-286da1a08676
1,tabular-data-package,List of companies in the S&P 500 (Standard and...,1.0.0,,,s-and-p-500-companies,core/s-and-p-500-companies,S&P 500 Companies with Financial Information,,,d8d8738f-bcaf-4be5-b2c6-a3908bcc45c8
2,tabular-data-package,List of companies in the S&P 500 (Standard and...,1.0.0,,,s-and-p-500-companies,core/s-and-p-500-companies,S&P 500 Companies with Financial Information,,,1b1445aa-4e1b-4efe-8a92-e62d5cfa20f4
3,tabular-data-package,The Renewable Energy Planning Database ('REPD'...,0.1.0,,,renewable-energy-planning-database,,Renewable Energy Planning Database,https://www.gov.uk/government/publications/ren...,,d310fc8c-1cb4-49e4-bd10-177bee339052
4,tabular-data-package,List of companies in the S&P 500 (Standard and...,1.0.0,,,s-and-p-500-companies,core/s-and-p-500-companies,S&P 500 Companies with Financial Information,,,58ab2931-182d-4787-b0ba-4ae8bba79d70


In [13]:
db_client.get_all('fd__field_descriptor').head()

Unnamed: 0,name,type,example,constraints,data_schema_id,title,description,format,rdfType,field_descriptor_id
0,Symbol,string,,,75619819-243c-46ea-a7e4-d7e43ca1fc96,,,,,3e11fe02-25f8-49fe-b714-2f142957bf85
1,Name,string,,,75619819-243c-46ea-a7e4-d7e43ca1fc96,,,,,13c8e6b4-9334-489f-b7bc-cc0784a143d0
2,Sector,string,,,75619819-243c-46ea-a7e4-d7e43ca1fc96,,,,,75724b59-7dc2-44c7-93cd-2daac77a01cb
3,Symbol,string,,,f5573618-da5a-48da-a281-9e6e34fde8dc,,,,,ce72387f-369f-4715-ae74-d223f8d2d579
4,Name,string,,,f5573618-da5a-48da-a281-9e6e34fde8dc,,,,,ce104148-607f-43c2-839e-ff5abd8deb23


In [23]:
fd_package_obj = db_client.get_data_package(fd_package.data_package_id, 'dict')
fd_package_obj = schemas.DataPackage.parse_obj(fd_package_obj)
fd_package_dict = db.db_record_to_dict_repr(fd_package_obj)

JSON([fd_package_dict])

<IPython.core.display.JSON object>

In [24]:
def dict_deep_equals(d1, d2, path=""):
    for k in d1:
        if k in d2:
            if type(d1[k]) is dict:
                dict_deep_equals(d1[k], d2[k], "%s -> %s" % (path, k) if path else k)
            if d1[k] != d2[k]:
                result = [ "%s: " % path, " - %s : %s" % (k, d1[k]) , " + %s : %s" % (k, d2[k])]
                print("\n".join(result))
                return False
        else:
            print("%s%s as key not in d2\n" % ("%s: " % path if path else "", k))
            return False
        
    return True

In [25]:
for i, resource in enumerate(fd_package_dict['resources']):
    if 'fd_schema' in resource.keys():
        resource['schema'] = resource.pop('fd_schema')

assert dict_deep_equals(fd_package_dict, load_raw_fd_package(fd_fp)), 'Failed roundtrip validation'