In [44]:
from pydantic import BaseModel, Field
from enum import Enum, StrEnum
import polars as pl


class Foo(Enum):
    A = 1
    B = 2


from datetime import date, datetime
from typing import Literal, Optional


class SimpleExample(BaseModel):
    id: str
    name: str
    age: int = Field(json_schema_extra={"dtype": pl.Int64()})


class NearlyCompleteExample(BaseModel):
    int_with_dtype_value: int = Field(json_schema_extra={"dtype": pl.Int64()})
    int_value: int
    float_value: float
    str_value: str
    bool_value: bool
    list_value: list[int]
    list_value_nullable: list[int | None]
    literal_value: Literal["a", "b"]
    default_value: str = "my_default"
    optional_value: Optional[int]
    bounded_value: int = Field(ge=10, le=20)
    date_value: date
    datetime_value: datetime
    enum_value: Foo

In [45]:
from pydantic.fields import FieldInfo
from typing import get_args

from polars.datatypes import DataType as PolarsDataType


class PatitoReducedField(BaseModel, arbitrary_types_allowed=True):
    name: str
    dtype: PolarsDataType
    required: bool
    nullable: bool
    unique: bool
    type_hint: type
    contraints: list[pl.Expr] | None = None


def is_single_type(type: type) -> bool:
    """Type hint is a single type.
    
    True for: int, str, float, bool, etc.
    False for: Optional[int], Union[int, str], Literal["a", "b"], etc.
    """
    return get_args(type) == ()


def is_literal(type_: type) -> bool:
    "Determine whether the type hint is a Literal type."
    try:
        return type_.__dict__["__origin__"] is Literal
    except KeyError:
        return False


def get_enum_inner_type(enum: type) -> type | None:
    "Get the type of the values of the enum if it exists, None otherwise."
    if issubclass(enum, Enum):
        enum_types = set(type(value) for value in enum)  # type: ignore
        if len(enum_types) > 1:
            raise TypeError(
                "All enumerated values of enums used to annotate "
                "Patito model fields must have the same type. "
                "Encountered types: "
                f"{sorted(map(lambda t: t.__name__, enum_types))}."
            )
        enum_type = enum_types.pop()
    else:
        enum_type = None
    return enum_type




model_fields = Example.model_fields

for field_name, field_info in model_fields.items():
    print(field_name, field_info, "\t\t", get_args(field_info.annotation))

id annotation=str required=True 		 ()
name annotation=str required=True 		 ()
age annotation=int required=True json_schema_extra={'dtype': Int64} 		 ()


In [42]:
model_fields["id"]

FieldInfo(annotation=str, required=True)

In [37]:
# construct a dictionary containing the relevant field information from the pydantic model
from patito.pydantic import PYTHON_TO_POLARS_TYPES

def get_polars_dtype(field_info: FieldInfo) -> PolarsDataType | None:
    if schema_extra := field_info.json_schema_extra:
        dtype = schema_extra.get("dtype")
    else:
        dtype = None
    return dtype

def get_dtype(field_info: FieldInfo, type_hint_type: type) -> PolarsDataType:
    return get_polars_dtype(field_info) or PYTHON_TO_POLARS_TYPES[type_hint_type]

fields = {}
for field_name, field_info in model_fields.items():
    print(field_name)
    fields[field_name] = {}

    assert field_info.annotation is not None, (
        f"Encountered a case where `field_info.annotation` is None for field `{field_name}`.`"
        "Please report this with an example of your Model in an issue to the patito github repo."
    )
    if is_single_type(field_info.annotation):
        # e.g. regular type like int, float, str, bool, but also Enum
        if enum_type := get_enum_inner_type(field_info.annotation):
            type_hint_type = enum_type
        else:
            type_hint_type = field_info.annotation
        dtype = get_dtype(field_info, type_hint_type)
    else:
        # e.g. list[int], list[int | None], Literal["a", "b"] or nullable types like Optional[int] or date | None
        type_hint_type = int
        fields[field_name]["type_hint"] = type_hint_type

    PatitoReducedField(name =field_name, dtype=dtype, required=field_info.is_required(), nullable=field_info.allow_none, unique=False, type_hint=type_hint_type)

print(get_required(model_fields))

id
name
age
{'id', 'name', 'age'}


In [124]:
import patito as pt
class MyModel(pt.Model):
    # Required column, does not allow nulls
    a: int

ExpandedModel = MyModel.with_fields(
    # Does not allow the column to be missing, and no nulls
    b=(int, ...),
    # Allows missing column, but do not allow explicit nulls
    c=(int, None),
    # Required column, must have values greater than 10
    d=(int, pt.Field(gt=10)),
    # Allows missing column, allows explicit nulls
    e=(int | None, None),
)

In [None]:
from pydantic import BaseModel, Field
import polars as pl


class Foo:
    bar = 1

class Example(BaseModel):
    name: str = 'example'
    dtype1: str = 'example'
    dtype2: int = Field(1, json_schema_extra = {"dtype":pl.Int64()})

Example.model_json_schema() # works in 2.3, fails in 2.4

In [6]:
class Example(BaseModel, arbitrary_types_allowed=True):
    name: str = 'example'
    dtype1: str = 'example'
    dtype2: pl.Int64


In [7]:
Example.model_json_schema()

PydanticInvalidForJsonSchema: Cannot generate a JsonSchema for core_schema.IsInstanceSchema (Int64)

For further information visit https://errors.pydantic.dev/2.3/u/invalid-for-json-schema

In [1]:
from typing import Callable

from pydantic_core import PydanticOmit, core_schema

from pydantic import BaseModel
from pydantic.json_schema import GenerateJsonSchema, JsonSchemaValue

import polars as pl

from patito import Field


class MyGenerateJsonSchema(GenerateJsonSchema):
    def handle_invalid_for_json_schema(
        self, schema: core_schema.CoreSchema, error_info: str
    ) -> JsonSchemaValue:
        raise PydanticOmit


def example_callable():
    return 1


class Example(BaseModel):
    name: str = 'example'
    dtype1: str = 'example'
    dtype2: int = Field(1, json_schema_extra = {"dtype":pl.Int64})


instance_example = Example()

validation_schema = instance_example.model_json_schema(
    schema_generator=MyGenerateJsonSchema, mode='validation'
)
print(validation_schema)

{'properties': {'name': {'default': 'example', 'title': 'Name', 'type': 'string'}, 'dtype1': {'default': 'example', 'title': 'Dtype1', 'type': 'string'}, 'dtype2': {'default': 1, 'dtype': Int64, 'title': 'Dtype2', 'type': 'integer', 'unique': False}}, 'title': 'Example', 'type': 'object'}
