## Data validation and settings management using Pydantic. Type annotations.

In [None]:
%pip install pydantic

### Runtime enforcement in action

In [None]:
# without pydantic

class UserWithOnlyTypeHints:
    def __init__(self, id: int, name: str):
        self.id: int = id
        self.name: str = name

uw1 = UserWithOnlyTypeHints(1, "alice")
uw2 = UserWithOnlyTypeHints(2, 22) # when uw2.name is accessed, it will raise an error


In [None]:
# Let's use Pydantic to enforce types

from pydantic import BaseModel

class User(BaseModel):
    id: int
    name: str

# valid input
user1 = User(id = 123, name = "Alice")
print(user1)

# invalid input: 'id' should be an int
# user2 = User(id = "hundred", name = "bob") # throws "ValidationError" much early in the development process

id=123 name='Alice'


In [20]:
# Another example of runtime validation

from pydantic import BaseModel

class Product(BaseModel):
    name: str
    price: float
    quantity: int

# valid input
item = Product(name="Apple", price=1.99, quantity=10)
invalid_item = Product(name="Banana", price="cheap", quantity="many")


ValidationError: 2 validation errors for Product
price
  Input should be a valid number, unable to parse string as a number [type=float_parsing, input_value='cheap', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/float_parsing
quantity
  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='many', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/int_parsing

### Basic use case

In [1]:
from pydantic import BaseModel, ConfigDict

class User(BaseModel):
    model_config = ConfigDict(strict=True)
    name: str
    age: int

#user = User(name="Alice", age="30")
# pydantic.error_wrappers.ValidationError
# age

user = User(name="Alice", age=30)
print(user)

name='Alice' age=30


### Beyond "str" and "int"
Pydantic supports below field types
1. Primitive types: str, int, float, bool
2. Collection types: list, tuple, set, dict
3. Optional types: Optional from the typing module for fields that can be None
4. Fields are required by default unless explicitly marked as Optional.
5. Missing required fields will raise ValidationError.

In [23]:
from typing import List, Dict, Optional, Union
from pydantic import BaseModel

class User(BaseModel):
    name: str
    age: Optional[int] = None
    tags: List[str]
    metadata: Dict[str, Union[str, int, float]]

user1 = User(
    name="Alice",
#    age=30,
    tags=["developer", "python"],
    metadata={"location": "Wonderland", "experience": 5}
)

### Automatic Type Coercion

In [26]:
from pydantic import BaseModel

class User(BaseModel):
    id: int
    name: str
    is_active: bool

# Input with incorrect types, but Pydantic will coerce them
data = User(id="123", name="John Doe", is_active="true")
print(data)


id=123 name='John Doe' is_active=True


### Undertsand mutable default values handling in pydantic

In [47]:
# Demonstrate the behavior of mutable default values in regular Python classes (without Pydantic).
# Here, all instances of Model share the same list object for item_counts,
# which can lead to unexpected side effects when modifying the list in one instance.

class Model():
    item_counts: list[dict[str, int]] = [{}]


m1 = Model()
m2 = Model()

m1.item_counts[0]['a'] = 1
print(m1.item_counts)

print(m2.item_counts)


[{'a': 1}]
[{'a': 1}]


In [50]:
# All instances of Model share the same list object for item_counts
class Model:
    item_counts: list[dict[str, int]] = [{}]

m1 = Model()
m1.item_counts[0]['a'] = 1
print(m1.item_counts)

m2 = Model()
print(m2.item_counts)


[{'a': 1}]
[{'a': 1}]


In [None]:
from dataclasses import dataclass, field

@dataclass
class Model():
    # Use default_factory to ensure each instance gets its own list of dicts
    item_counts: list[dict[str, int]] = field(default_factory=lambda: [{}])


m1 = Model()
m1.item_counts[0]['a'] = 1
print(m1.item_counts)

m2 = Model()
print(m2.item_counts)

[{'a': 1}]
[{}]


In [45]:
from dataclasses import dataclass, field

@dataclass
class Model():
    # Use default_factory to ensure each instance gets its own list of dicts
    item_counts: list[dict[str, int]] = field(default_factory=lambda: [{}])


m1 = Model()
m2 = Model()

m1.item_counts[0]['a'] = 1
print(m1.item_counts)
print(m2.item_counts)

[{'a': 1}]
[{}]


In [None]:
from pydantic import BaseModel, Field


class Model(BaseModel):
    item_counts: list[dict[str, int]] = Field(default_factory=lambda: [{}])

m1 = Model()
m2 = Model()

m1.item_counts[0]['a'] = 1
print(m1.item_counts)

print(m2.item_counts)


[{'a': 1}]
[{}]


### Field Constraints and Default Values 
This allows you to define validation rules and fallback values directly in your Pydantic models, ensuring data quality while providing flexibility.

Use Field() to add constraints like min/max values, string lengths, and regex patters
Specify defaults for optional fields with default= or use default_factory=for mutable defaults.

default_factory is used when you need to provide a dynamic value for a field, especially useful for mutable types like lists, dictionaries, or custom objects. Unlike default, which uses a **fixed value**, default_factory calls a function to **generate the default value** each time it's needed.

In [27]:
from pydantic import BaseModel, Field
from typing import List

class Product(BaseModel):
    id : int = Field(gt=0, description="Product ID must be positive")
    name: str = Field(min_length=3, max_length=50)
    price: float = Field(gt=0, lt=10000, default=9.99)
    in_stock: bool = Field(default=True)
    
# instantiate
product = Product(id=101, name="Widget")

In [None]:
# difference between default and defualt_factory
# Notice empty list

# In Pydantic, using default=[] for a field does NOT cause all instances to share the same list.
# Pydantic internally copies the default value for each new instance, so each model gets its own list.
# This is different from Python dataclasses, where using a mutable default like [] would be shared across instances.
# Therefore, both default=[] and default_factory=list are safe in Pydantic

from pydantic import BaseModel, Field
from typing import List
from datetime import datetime

class Order(BaseModel):
    # Bad: All instances will share the same list
    items_bad: List[str] = Field(default=[])

    # Good: Each instance gets a new list
    items_good: List[str] = Field(default_factory=list)

    # Generate timestamp when order is created
    created_at: datetime = Field(default_factory=datetime.now)

order1 = Order()
order2 = Order()
print("order1 = ", order1, "\n" ,"order2 = ", order2)

order1.items_bad.append("bad item 1")
order2.items_bad.append("bad item 2")
print("order1 = ", order1, "\n" ,"order2 = ", order2)

order1 =  items_bad=[] items_good=[] created_at=datetime.datetime(2025, 7, 4, 13, 21, 59, 752149) 
 order2 =  items_bad=[] items_good=[] created_at=datetime.datetime(2025, 7, 4, 13, 21, 59, 752418)
order1 =  items_bad=['bad item 1'] items_good=[] created_at=datetime.datetime(2025, 7, 4, 13, 21, 59, 752149) 
 order2 =  items_bad=['bad item 2'] items_good=[] created_at=datetime.datetime(2025, 7, 4, 13, 21, 59, 752418)


In [None]:
from pydantic import BaseModel, Field

class Product(BaseModel):
    name: str = Field(..., min_length=3, max_length=50, description="Name of the product")
    price: float = Field(..., gt=0, description="Must be a positive number")
    quantity: int = Field(default=1, ge=0, description="Must be non-negative")

p1 = Product(name="Television", price=2999.99) # ✅ valid
p2 = Product(name="TV", price=-299.99)  # This will raise a ValidationError

#### Pass the function reference without calling it
default_factory=list
default_factory=list()
The factory function is called only when a value is needed, making it efficient for resource-intensive operations.

In [49]:
# With custom factory functions

from pydantic import BaseModel, Field
import random
from datetime import datetime, timedelta

def generate_order_id():
    return f"ORD-{random.randint(1000, 9999)}"


class New_Order(BaseModel):
    order_id: str = Field(default_factory=generate_order_id)
    items: List[str] = Field(default_factory=list)
    expiry_date: datetime = Field(default_factory=lambda: datetime.now() + timedelta(days=30))

order1 = New_Order()
print(order1)

order_id='ORD-1357' items=[] expiry_date=datetime.datetime(2025, 8, 3, 15, 15, 8, 686694)


### Nested Models
Nest models within each other, enabling complex data structures.  
When defining nested models, Pydantic handles validation of the entire object tree, ensuring that data at all levels meets your specified requirements.

In [32]:
from pydantic import BaseModel
from typing import List

class Address(BaseModel):
    city: str
    country: str

# Parent model with nested Address
class Person(BaseModel):
    name: str
    addresses: List[Address]

# Example usage
person = Person(
    name="Alice",
    addresses=[
        Address(city="Wonderland", country="Fantasyland"),
        Address(city="Springfield", country="USA")
    ]   
)

In [None]:
from pydantic import BaseModel
from typing import List

class OrderItem(BaseModel):
    product_id: str
    quantity: int

class Order(BaseModel):
    order_id: str
    items: List[OrderItem]

# Pydantic recursively parses each dictionary in the items list into an instance of OrderItem.
order = Order(
    order_id="ORD123",
    items=[
        {"product_id": "P001", "quantity": 2},
        {"product_id": "P002", "quantity": 1}
    ]
)

print(order.items[0].product_id)


P001


### Built-in support for parsing from JSON, Dicts, etc

### Custom validators

Custom validators enable complex validation logic beyond simple type checking, allowing for data transformation, cross-field validation, and business rule enforcement

* Validators can both validate and tranform input data
* Validation errors provide specific feedback about what went wrong
* Validators are executed in a predictable order during model creation

In [39]:
from pydantic import BaseModel, field_validator

class Product(BaseModel):
    name: str
    price: float

    @field_validator('price')
    def price_must_be_positive(cls, value):
        if value < 0:
            raise ValueError('Price should be positive')
        return value * 0.9 # data tranformation
    
# create a product with validation
product = Product(name="Mug", price=1)

print(product)

name='Mug' price=0.9


In [None]:
from pydantic import BaseModel, field_validator
import re

class User(BaseModel):
    username: str

    @field_validator('username')
    @classmethod
    def validate_username(cls, value):
        if not re.match(r'^[a-zA-Z0-9_]{3,20}$', value):
            raise ValueError("Username must be 3-20 characters long and can only contain letters, numbers, and underscores.")
        return value
    
# valid
user = User(username="valid_user123")

# Invalid (raises ValidationError)
user = User(username="invalid user!")

In [None]:
from pydantic import BaseModel, model_validator
from datetime import date

class Booking(BaseModel):
    start_date: date
    end_date: date

    @model_validator(mode='after')
    def check_dates(self):
        if self.start_date >= self.end_date:
            raise ValueError("start_date must be before end_date")
        return self

# Works fine
Booking(start_date="2025-07-01", end_date="2025-07-10")

# Raises error
Booking(start_date="2025-07-15", end_date="2025-07-10")


Booking(start_date=datetime.date(2025, 7, 1), end_date=datetime.date(2025, 7, 10))