In [1]:
from trailpack.packing import Packing, read_parquet

In [2]:
from test_packing import df

In [3]:
df# .to_excel('../../tests/data/example_data.xlsx', index=False)

Unnamed: 0,location,timestamp,amount
0,New York,2025-10-13 08:00:00,4
1,Berlin,2025-10-13 09:30:00,10
2,Tokyo,2025-10-13 11:15:00,7


# Using DataPackage Schema Classes

The new DataPackage schema classes provide a structured way to create metadata dictionaries with validation and UI-friendly field definitions.

In [4]:
# Import the new schema classes
from trailpack.packing import (
    MetaDataBuilder,
    Resource, 
    Field, 
    FieldConstraints,
    Unit
)

## Method 1: Using DataPackageBuilder (Recommended)

The `DataPackageBuilder` provides a fluent interface for creating metadata step by step with built-in validation.

In [5]:
# Create metadata using the fluent builder interface
builder_metadata = (MetaDataBuilder()
    .set_basic_info(
        name="sample-dataset",
        title="Sample Dataset with Schema Classes", 
        description="This demonstrates creating metadata using the new schema classes",
        version="1.0.0"
    )
    .set_profile("tabular-data-package")
    .set_keywords(["example", "schema", "metadata", "trailpack"])    
    .add_contributor("Demo User", "author", "demo@example.com")
    .add_source("Sample Data Generator", "https://example.com/sample-data")
    .add_license(name='CC-BY-SA-2.0', title='Creative Commons Attribution Share Alike 2.0 Generic', path='https://spdx.org/licenses/CC-BY-SA-2.0.html')  # Uses default CC-BY-4.0
    .add_license()
    .add_resource(Resource(
        name="sample-data",
        path="data/sample-data.parquet",
        description="A sample dataset resource",
        fields=[
            Field(name="location", type="string",constraints=FieldConstraints(required=True)),
            Field(name="timestamp", type="datetime", constraints=FieldConstraints(required=True)),
            Field(name="amount", type="integer", unit=Unit(name="kg", long_name="kilogram", path="http://qudt.org/vocab/unit#Kilogram"), constraints=FieldConstraints(minimum=0))
        ]
    )).build())

print("Metadata created with MetaDataBuilder:")
print(f"Package name: {builder_metadata['name']}")
print(f"Title: {builder_metadata['title']}")
print(f"Keywords: {builder_metadata['keywords']}")
print(f"License: {builder_metadata['licenses'][0]['name']}")
print(f"Contributors: {len(builder_metadata['contributors'])}")
print(f"Sources: {len(builder_metadata['sources'])}")

Metadata created with MetaDataBuilder:
Package name: sample-dataset
Title: Sample Dataset with Schema Classes
Keywords: ['example', 'schema', 'metadata', 'trailpack']
License: CC-BY-SA-2.0
Contributors: 1
Sources: 1


In [6]:
builder_metadata

{'created': '2025-10-14T17:52:44.568840',
 'name': 'sample-dataset',
 'title': 'Sample Dataset with Schema Classes',
 'description': 'This demonstrates creating metadata using the new schema classes',
 'version': '1.0.0',
 'profile': 'tabular-data-package',
 'keywords': ['example', 'schema', 'metadata', 'trailpack'],
 'licenses': [{'name': 'CC-BY-SA-2.0',
   'title': 'Creative Commons Attribution Share Alike 2.0 Generic',
   'path': 'https://spdx.org/licenses/CC-BY-SA-2.0.html'},
  {'name': 'CC-BY-4.0',
   'title': 'Creative Commons Attribution 4.0 International',
   'path': 'https://spdx.org/licenses/CC-BY-4.0.html'}],
 'contributors': [{'name': 'Demo User',
   'role': 'author',
   'email': 'demo@example.com'}],
 'sources': [{'title': 'Sample Data Generator',
   'path': 'https://example.com/sample-data'}],
 'resources': [{'name': 'sample-data',
   'path': 'data/sample-data.parquet',
   'description': 'A sample dataset resource',
   'schema': {'fields': [{'name': 'location',
      'typ

In [7]:
tp = Packing(df, builder_metadata)
tp.write_parquet('../tests/data/example.parquet')

## Option 1 for reading directly with read_parquet

In [13]:
data, meta_data = read_parquet('../tests/data/example.parquet')
data

Unnamed: 0,location,timestamp,amount
0,New York,2025-10-13 08:00:00,4
1,Berlin,2025-10-13 09:30:00,10
2,Tokyo,2025-10-13 11:15:00,7


In [14]:
meta_data

{'created': '2025-10-14T17:52:44.568840',
 'name': 'sample-dataset',
 'title': 'Sample Dataset with Schema Classes',
 'description': 'This demonstrates creating metadata using the new schema classes',
 'version': '1.0.0',
 'profile': 'tabular-data-package',
 'keywords': ['example', 'schema', 'metadata', 'trailpack'],
 'licenses': [{'name': 'CC-BY-SA-2.0',
   'title': 'Creative Commons Attribution Share Alike 2.0 Generic',
   'path': 'https://spdx.org/licenses/CC-BY-SA-2.0.html'},
  {'name': 'CC-BY-4.0',
   'title': 'Creative Commons Attribution 4.0 International',
   'path': 'https://spdx.org/licenses/CC-BY-4.0.html'}],
 'contributors': [{'name': 'Demo User',
   'role': 'author',
   'email': 'demo@example.com'}],
 'sources': [{'title': 'Sample Data Generator',
   'path': 'https://example.com/sample-data'}],
 'resources': [{'name': 'sample-data',
   'path': 'data/sample-data.parquet',
   'description': 'A sample dataset resource',
   'schema': {'fields': [{'name': 'location',
      'typ

## Option 2 for reading through the Packing class

In [8]:
tp_empty = Packing()

In [10]:
tp_empty.read_parquet('../tests/data/example.parquet')

In [12]:
tp_empty.meta_data

{'created': '2025-10-14T17:52:44.568840',
 'name': 'sample-dataset',
 'title': 'Sample Dataset with Schema Classes',
 'description': 'This demonstrates creating metadata using the new schema classes',
 'version': '1.0.0',
 'profile': 'tabular-data-package',
 'keywords': ['example', 'schema', 'metadata', 'trailpack'],
 'licenses': [{'name': 'CC-BY-SA-2.0',
   'title': 'Creative Commons Attribution Share Alike 2.0 Generic',
   'path': 'https://spdx.org/licenses/CC-BY-SA-2.0.html'},
  {'name': 'CC-BY-4.0',
   'title': 'Creative Commons Attribution 4.0 International',
   'path': 'https://spdx.org/licenses/CC-BY-4.0.html'}],
 'contributors': [{'name': 'Demo User',
   'role': 'author',
   'email': 'demo@example.com'}],
 'sources': [{'title': 'Sample Data Generator',
   'path': 'https://example.com/sample-data'}],
 'resources': [{'name': 'sample-data',
   'path': 'data/sample-data.parquet',
   'description': 'A sample dataset resource',
   'schema': {'fields': [{'name': 'location',
      'typ

In [11]:
tp_empty.data

Unnamed: 0,location,timestamp,amount
0,New York,2025-10-13 08:00:00,4
1,Berlin,2025-10-13 09:30:00,10
2,Tokyo,2025-10-13 11:15:00,7


## Option 3 for reading the data with pyarrow (this is what read_parquet does):

In [16]:
metadata = pq.read_table('../tests/data/example.parquet').schema.metadata[b"datapackage.json"].decode('utf-8')

In [17]:
import json

In [18]:
json.loads(metadata)

{'created': '2025-10-14T17:39:17.364753',
 'name': 'sample-dataset',
 'title': 'Sample Dataset with Schema Classes',
 'description': 'This demonstrates creating metadata using the new schema classes',
 'version': '1.0.0',
 'profile': 'tabular-data-package',
 'keywords': ['example', 'schema', 'metadata', 'trailpack'],
 'licenses': [{'name': 'CC-BY-SA-2.0',
   'title': 'Creative Commons Attribution Share Alike 2.0 Generic',
   'path': 'https://spdx.org/licenses/CC-BY-SA-2.0.html'},
  {'name': 'CC-BY-4.0',
   'title': 'Creative Commons Attribution 4.0 International',
   'path': 'https://spdx.org/licenses/CC-BY-4.0.html'}],
 'contributors': [{'name': 'Demo User',
   'role': 'author',
   'email': 'demo@example.com'}],
 'sources': [{'title': 'Sample Data Generator',
   'path': 'https://example.com/sample-data'}],
 'resources': [{'name': 'sample-data',
   'path': 'data/sample-data.parquet',
   'description': 'A sample dataset resource',
   'schema': {'fields': [{'name': 'location',
      'typ