In [1]:
from packing import Packing

In [2]:
from test_packing import df

In [3]:
df

Unnamed: 0,location,timestamp,amount
0,New York,2025-10-13 08:00:00,4
1,Berlin,2025-10-13 09:30:00,10
2,Tokyo,2025-10-13 11:15:00,5


# Using DataPackage Schema Classes

The new DataPackage schema classes provide a structured way to create metadata dictionaries with validation and UI-friendly field definitions.

In [4]:
# Import the new schema classes
from datapackage_schema import (
    DataPackageBuilder, 
    DataPackageSchema,
    Resource, 
    Field, 
    FieldConstraints,
    COMMON_LICENSES
)

## Method 1: Using DataPackageBuilder (Recommended)

The `DataPackageBuilder` provides a fluent interface for creating metadata step by step with built-in validation.

In [16]:
# Create metadata using the fluent builder interface
builder_metadata = (DataPackageBuilder()
    .set_basic_info(
        name="sample-dataset",
        title="Sample Dataset with Schema Classes", 
        description="This demonstrates creating metadata using the new schema classes",
        version="1.0.0"
    )
    .set_profile("tabular-data-package")
    .set_keywords(["example", "schema", "metadata", "trailpack"])
    .add_license(
        COMMON_LICENSES['CC-BY-4.0']['name'],
        COMMON_LICENSES['CC-BY-4.0']['title'],
        COMMON_LICENSES['CC-BY-4.0']['path']
    )
    .add_contributor("Demo User", "author", "demo@example.com")
    .add_source("Sample Data Generator", "https://example.com/sample-data")
    .add_resource(Resource(
        name="sample-data",
        path="data/sample-data.parquet",
        description="A sample dataset resource",
        fields=[
            Field(name="location", type="string",constraints=FieldConstraints(required=True)),
            Field(name="timestamp", type="datetime", constraints=FieldConstraints(required=True)),
            Field(name="amount", type="integer", unit="kg", constraints=FieldConstraints(minimum=0))
        ]
    )).build())

print("Metadata created with DataPackageBuilder:")
print(f"Package name: {builder_metadata['name']}")
print(f"Title: {builder_metadata['title']}")
print(f"Keywords: {builder_metadata['keywords']}")
print(f"License: {builder_metadata['licenses'][0]['name']}")
print(f"Contributors: {len(builder_metadata['contributors'])}")
print(f"Sources: {len(builder_metadata['sources'])}")

Metadata created with DataPackageBuilder:
Package name: sample-dataset
Title: Sample Dataset with Schema Classes
Keywords: ['example', 'schema', 'metadata', 'trailpack']
License: CC-BY-4.0
Contributors: 1
Sources: 1


In [17]:
builder_metadata

{'name': 'sample-dataset',
 'title': 'Sample Dataset with Schema Classes',
 'description': 'This demonstrates creating metadata using the new schema classes',
 'version': '1.0.0',
 'profile': 'tabular-data-package',
 'keywords': ['example', 'schema', 'metadata', 'trailpack'],
 'licenses': [{'name': 'CC-BY-4.0',
   'title': 'Creative Commons Attribution 4.0',
   'path': 'https://creativecommons.org/licenses/by/4.0/'}],
 'contributors': [{'name': 'Demo User',
   'role': 'author',
   'email': 'demo@example.com'}],
 'sources': [{'title': 'Sample Data Generator',
   'path': 'https://example.com/sample-data'}],
 'resources': [{'name': 'sample-data',
   'path': 'data/sample-data.parquet',
   'description': 'A sample dataset resource',
   'schema': {'fields': [{'name': 'location',
      'type': 'string',
      'constraints': {'required': True}},
     {'name': 'timestamp',
      'type': 'datetime',
      'constraints': {'required': True}},
     {'name': 'amount',
      'type': 'integer',
      

In [8]:
tp = Packing(df, builder_metadata)
tp.write_parquet('../../tests/data/example.parquet')

In [9]:
tp_empty = Packing()

In [10]:
df_read, metadata_read = tp_empty.read_parquet('../../tests/data/example.parquet')

In [11]:
df_read

Unnamed: 0,location,timestamp,amount
0,New York,2025-10-13 08:00:00,4
1,Berlin,2025-10-13 09:30:00,10
2,Tokyo,2025-10-13 11:15:00,5


In [15]:
print("Metadata read from Parquet file:")
metadata_read


Metadata read from Parquet file:


{'name': 'sample-dataset',
 'title': 'Sample Dataset with Schema Classes',
 'description': 'This demonstrates creating metadata using the new schema classes',
 'version': '1.0.0',
 'profile': 'tabular-data-package',
 'keywords': ['example', 'schema', 'metadata', 'trailpack'],
 'licenses': [{'name': 'CC-BY-4.0',
   'title': 'Creative Commons Attribution 4.0',
   'path': 'https://creativecommons.org/licenses/by/4.0/'}],
 'contributors': [{'name': 'Demo User',
   'role': 'author',
   'email': 'demo@example.com'}],
 'sources': [{'title': 'Sample Data Generator',
   'path': 'https://example.com/sample-data'}],
 'resources': [{'name': 'sample-data',
   'path': 'data/sample-data.parquet',
   'description': 'A sample dataset resource',
   'schema': {'fields': [{'name': 'location',
      'type': 'string',
      'constraints': {'required': True}},
     {'name': 'timestamp',
      'type': 'datetime',
      'constraints': {'required': True}},
     {'name': 'amount',
      'type': 'integer',
      