In [24]:
import os
from typing import List, Optional
from datetime import datetime, timezone
from pydantic import BaseModel, EmailStr, Field, PositiveInt, HttpUrl, field_validator, model_validator, computed_field, PositiveFloat, ValidationError, AliasChoices, field_serializer
from pydantic_settings import BaseSettings, SettingsConfigDict

In [3]:
## Basics

class User(BaseModel):
    id: int
    name: str = "Islam Ahmed"
    email: EmailStr | None = None  # optional field of type email string or None

user = User(id=1)
print(user.model_fields_set)   # {'id'} # shows which fields are set by user
print(user.model_dump())   # converts the model to dict  # {'id': 1, 'name': 'Islam Ahmed'}
print(user.model_dump_json())   # converts the model to json string
print(user.model_json_schema())   # converts the model to json schema


try:
    user2 = User(id='a')   # raises validation error
except ValidationError as e:
    exceptions = e


print(exceptions.errors())   # shows the validation errors
print(exceptions.json())   # shows the validation errors in json format


{'id'}
{'id': 1, 'name': 'Islam Ahmed', 'email': None}
{"id":1,"name":"Islam Ahmed","email":null}
{'properties': {'id': {'title': 'Id', 'type': 'integer'}, 'name': {'default': 'Islam Ahmed', 'title': 'Name', 'type': 'string'}, 'email': {'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}], 'default': None, 'title': 'Email'}}, 'required': ['id'], 'title': 'User', 'type': 'object'}
[{'type': 'int_parsing', 'loc': ('id',), 'msg': 'Input should be a valid integer, unable to parse string as an integer', 'input': 'a', 'url': 'https://errors.pydantic.dev/2.12/v/int_parsing'}]
[{"type":"int_parsing","loc":["id"],"msg":"Input should be a valid integer, unable to parse string as an integer","input":"a","url":"https://errors.pydantic.dev/2.12/v/int_parsing"}]


In [4]:
## Nested Models

class Food(BaseModel):
    id: int
    name: str
    ingredients: Optional[List[str]] = None

class Restaurant(BaseModel):
    id: int
    name: str
    location: str
    foods: List[Food]

try:
    rest1 = Restaurant(
            id=1,
            name="bla",
            location="bla bla", 
            foods= 
            [
                {'id': 1, 'name': "food1", 'ingredients': ["salt"]},
                {'id': 2, 'name': "food2", 'ingredients': ["salt", "bla"]}
            ]
        )
    print(rest1)
    print(rest1.model_dump())
except ValidationError as e:    # pyright: ignore[reportUndefinedVariable]
    print(f"rest1 instantiated failed with errors {e}")


try:
    rest2 = Restaurant(
            id=2, 
            name="bla2", 
            location="bla bla2", 
            foods=[
                Food(id=3, name="food3")
            ]
        )
    print(rest2)
    print(rest2.model_dump())
except ValidationError as e:    # pyright: ignore[reportUndefinedVariable]
    print(f"rest2 instantiated failed with errors {e}")

id=1 name='bla' location='bla bla' foods=[Food(id=1, name='food1', ingredients=['salt']), Food(id=2, name='food2', ingredients=['salt', 'bla'])]
{'id': 1, 'name': 'bla', 'location': 'bla bla', 'foods': [{'id': 1, 'name': 'food1', 'ingredients': ['salt']}, {'id': 2, 'name': 'food2', 'ingredients': ['salt', 'bla']}]}
id=2 name='bla2' location='bla bla2' foods=[Food(id=3, name='food3', ingredients=None)]
{'id': 2, 'name': 'bla2', 'location': 'bla bla2', 'foods': [{'id': 3, 'name': 'food3', 'ingredients': None}]}


In [5]:
# %pip install pydantic[email]  

# %pip is a Jupyter magic command.
# It’s aware of the active kernel’s environment, so it installs packages exactly where your notebook’s Python interpreter runs.
# this will install email-validator for validating email
# use %pip instead of !pip because %pip ensures installation into the correct environment (the one your notebook is actually running).
# run pip freeze > requirements.txt

In [6]:
## serialization and deserialization
# Serializing is the process of converting a python object into a format that can be easily stored or transmitted (like JSON, XML, Dict, Str or binary).
# Deserializing is the reverse process, where the serialized data is converted back into its original structure or python object to use in the code.

# serialization to dict
class Person(BaseModel):
    id: int
    name: str

person = Person(id=1, name="Islam Ahmed")
print(person.model_dump())   

# deserialization from dict
p = {'id': 2, 'name': 'bla bla'}
person2 = Person.model_validate(p)
print(person2)

# deserialization from json string
p_json = '''
{
    "id": 3,
    "name": "foo foo"
}
'''
person3 = Person.model_validate_json(p_json)
print(person3)

{'id': 1, 'name': 'Islam Ahmed'}
id=2 name='bla bla'
id=3 name='foo foo'


In [7]:
## Additional Parsers

class Address(BaseModel):
    id: int
    street: str = Field(..., min_length=3, max_length=50)  # the 3 dots means required field
    city: str
    state: str
    zip_code: str

class Employee(BaseModel):
    id: int
    name: str
    email: EmailStr
    address: Address

class Owner(BaseModel):
    id: int
    name: str

class Restaurant(BaseModel):
    id: int
    name: str
    employees: List[Employee] = Field(..., min_length=2)   # list of at least 2 employee
    address: Address
    foods: List[Food]
    owner: Owner
    number_of_tables: PositiveInt = Field(..., gt=0, lt=100)  # greater than 0 and less than 100, PositiveInt means greater than 0
    website: HttpUrl  # it should start with http or https


rest1 = Restaurant(
        id=1,
        name="bla",
        employees=[
            {'id': 1, 'name': "emp1", 'email': "emp1@gmail.com", 'address': {'id': 1, 'street': "st 1", 'city': "cairo", 'state': "cairo", 'zip_code': "12345"}},
            {'id': 2, 'name': "emp2", 'email': "emp2@gmail.com", 'address': {'id': 2, 'street': "st 2", 'city': "cairo", 'state': "cairo", 'zip_code': "12345"}},
        ],
        address={'id': 1, 'street': "st 1", 'city': "cairo", 'state': "cairo", 'zip_code': "12345"},
        foods= 
        [
            {'id': 1, 'name': "food1", 'ingredients': ["salt"]},
            {'id': 2, 'name': "food2", 'ingredients': ["salt", "bla"]}
        ],
        owner={'id': 1, 'name': "owner1"},
        number_of_tables=10,
        website="https://www.google.com"
    )
print(rest1.model_dump())


{'id': 1, 'name': 'bla', 'employees': [{'id': 1, 'name': 'emp1', 'email': 'emp1@gmail.com', 'address': {'id': 1, 'street': 'st 1', 'city': 'cairo', 'state': 'cairo', 'zip_code': '12345'}}, {'id': 2, 'name': 'emp2', 'email': 'emp2@gmail.com', 'address': {'id': 2, 'street': 'st 2', 'city': 'cairo', 'state': 'cairo', 'zip_code': '12345'}}], 'address': {'id': 1, 'street': 'st 1', 'city': 'cairo', 'state': 'cairo', 'zip_code': '12345'}, 'foods': [{'id': 1, 'name': 'food1', 'ingredients': ['salt']}, {'id': 2, 'name': 'food2', 'ingredients': ['salt', 'bla']}], 'owner': {'id': 1, 'name': 'owner1'}, 'number_of_tables': 10, 'website': HttpUrl('https://www.google.com/')}


In [None]:
## Field Validator: is used with a single field

class User(BaseModel):
    id: int
    name: str

    @field_validator('name')
    @classmethod
    def name_must_be_capitalized(cls, v: str) -> str:
        if ' ' not in v:
            raise ValueError('Owner must contain a space')
        return v.title()  # Capitalize each word
    
try:
    user = User(id=1, name="islam ahmed")
    print(user)
except ValueError as e:
    print(f"user instantiated failed with errors {e}")  



class Model(BaseModel):
    numbers: List[int]

    @field_validator('numbers')
    @classmethod
    def check_numbers(cls, v: List[int]) -> List[int]:
        if len(set(v)) != len(v):
            raise ValueError('numbers must be unique')
        return v
    
try:
    model = Model(numbers=[1, "2", 3, 4, 5, 1])
    print(model)
except ValidationError as e:   # ValueError is wrapped in ValidationError
    print(f"model instantiated failed with errors {e}")


id=1 name='Islam Ahmed'
model instantiated failed with errors 1 validation error for Model
numbers
  Value error, numbers must be unique [type=value_error, input_value=[1, '2', 3, 4, 5, 1], input_type=list]
    For further information visit https://errors.pydantic.dev/2.12/v/value_error


In [9]:
## Model Validator: is used with multiple fields
# You need to validate relationships between multiple fields (e.g., start_date < end_date).
# Or you want to modify values before or after the model is created.

# before → You prepare the messy input data before letting Pydantic do its validation.
# after → You review the already validated model to ensure business logic is consistent before using/saving the data.

class User(BaseModel):
    id: int
    name: str
    email: EmailStr
    password: str
    password_confirm: str

    @model_validator(mode='before')
    @classmethod
    def passwords_match(cls, values: dict):
        if values.get('password') != values.get('password_confirm'):
            raise ValueError('Passwords do not match')
        if values.get('name'):
            values['name'] = values['name'].strip().title()
        
        return values
    

try:
    user = User(id=1, name='islam ahmed ', email='islam@gmail.com', password="1234", password_confirm="1234")
    print(user)
except ValueError as e:
    print(f"user instantiated failed with errors {e}")


class Event(BaseModel):
    id: int
    name: str
    age: PositiveInt
    start_date: str  # In a real app, use datetime
    end_date: str    # In a real app, use datetime

    @model_validator(mode='after')
    @classmethod
    def check_dates(cls, model):
        if model.start_date >= model.end_date:
            raise ValueError('start_date must be before end_date')
        if model.age < 18:
            raise ValueError('age must be at least 18')
        return model
    

try:
    event = Event(id=1, name="event1", age=2,start_date="2023-10-08", end_date="2023-10-09")
    print(event)
except ValueError as e:
    print(f"event instantiated failed with errors {e}")

id=1 name='Islam Ahmed' email='islam@gmail.com' password='1234' password_confirm='1234'
event instantiated failed with errors 1 validation error for Event
  Value error, age must be at least 18 [type=value_error, input_value={'id': 1, 'name': 'event1...end_date': '2023-10-09'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.12/v/value_error


In [10]:
## Field Class

class Product(BaseModel):
    id: int
    name: str = Field(..., min_length=3, max_length=50, description="The name of the product")
    price: float = Field(..., gt=0, description="The price must be greater than zero")
    description: Optional[str] = Field(None, max_length=300, description="A brief description of the product")  # None means optional and it's the default value
    in_stock: bool = Field(True, description="Availability of the product")
    tags: List[str] = Field(default_factory=list, description="Tags associated with the product") # default_factory for mutable types and it's better than default since I can pass a regular function or lambda function to generate the default value
    website: Optional[HttpUrl] = Field(None, description="The product's website URL (if available)")

try:
    product = Product(id=1, name="Laptop", price=999.99, description="A high-end gaming laptop", tags=["electronics", "gaming"], website="https://example.com/laptop")
    print(product.model_dump_json())
except ValueError as e:
    print(f"product instantiated failed with errors {e}")

{"id":1,"name":"Laptop","price":999.99,"description":"A high-end gaming laptop","in_stock":true,"tags":["electronics","gaming"],"website":"https://example.com/laptop"}


In [11]:
## Field Alias

p = {
    "id": 1,
    "First Name": "John",
    "Last Name": "Doe"
}

class Person(BaseModel):
    id: int
    first_name: str = Field(..., alias="First Name")
    last_name: str = Field(..., alias="Last Name")

person = Person.model_validate(p)
print(person.model_dump())   # {'id': 1, 'first_name': 'John', 'last_name': 'Doe'}
print(person.model_dump(by_alias=True))   # {'id': 1, 'First Name': 'John', 'Last Name': 'Doe'}

{'id': 1, 'first_name': 'John', 'last_name': 'Doe'}
{'id': 1, 'First Name': 'John', 'Last Name': 'Doe'}


In [12]:
## Model Configuration - populate by_name and by_alias

class Person(BaseModel):
    id: int
    first_name: str = Field(..., alias="First Name")
    last_name: str = Field(..., alias="Last Name")

    model_config = SettingsConfigDict(
        populate_by_name = True,  # allows population by field name in addition to alias
        populate_by_alias = True   # allows population by alias in addition to field name
    )  # I have to name it `model_config` for it to work

p = {
    "id": 1,
    "first_name": "John",   # using field name instead of alias
    "Last Name": "Doe"  # using alias instead of field name
}   

person = Person.model_validate(p)
print(person.model_dump())   # {'id': 1, 'first_name': 'John', 'last_name': 'Doe'}
print(person.model_dump(by_alias=True))   # {'id': 1, 'First Name': 'John', 'Last Name': 'Doe'} 

{'id': 1, 'first_name': 'John', 'last_name': 'Doe'}
{'id': 1, 'First Name': 'John', 'Last Name': 'Doe'}


In [13]:
## Default Factories

class Log(BaseModel):
    model_config = SettingsConfigDict(
        populate_by_name = True,  # allows population by field name in addition to alias
        populate_by_alias = True   # allows population by alias in addition to field name
    )  # I have to name it `model_config` for it to work

    id: int = Field(..., alias="log_id")
    event: str
    timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))  # default_factory for dynamic default value

log = Log(id=1, event="User logged in")
print(log)

log1 = Log(id=2, event="User logged out", timestamp=datetime(2023, 10, 1, 12, 0, 0, tzinfo=timezone.utc))
print(log1)

id=1 event='User logged in' timestamp=datetime.datetime(2025, 10, 15, 8, 26, 27, 617553, tzinfo=datetime.timezone.utc)
id=2 event='User logged out' timestamp=datetime.datetime(2023, 10, 1, 12, 0, tzinfo=datetime.timezone.utc)


In [None]:
## Field Serialization

class Model(BaseModel):
    number: float
    dt: datetime | None = Field(default_factory=lambda: datetime.now(timezone.utc))

    @field_serializer('number')
    def serialize_number(self, v: float) -> float:
        return round(v, 2)
    
    @field_serializer('dt', when_used='json-unless-none')  # only when serializing to json and if the value is not None
    def serialize_dt(self, v: datetime) -> str:
        return v.strftime("%Y-%m-%d %H:%M:%S %P")

  

# m = Model(number=3.14159, dt=None)
m = Model(number=3.14159)
print(m.model_dump())  # {'number': 3.14, 'dt': datetime.datetime(2024, 6, 10, 15, 30, 45, 123456, tzinfo=datetime.timezone.utc)}
print(m.model_dump_json())  # {"number": 3.14, "dt": "2024-06-10 15:30:45 AM"}

{'number': 3.14, 'dt': datetime.datetime(2025, 10, 15, 8, 45, 32, 181884, tzinfo=datetime.timezone.utc)}
{"number":3.14,"dt":"2025-10-15 08:45:32 am"}


In [15]:
## Computed Fields
# @computed_field (introduced in Pydantic v2) is a decorator that allows you to define a dynamic or calculated field that:
# doesn’t come from user input,
# is automatically included when the model is serialized (e.g., .model_dump() or .json()),
# is read-only (computed from other fields).
# is like a calculated column in SQL — it’s not stored, but automatically generated from existing data whenever you use the model.


class Rectangle(BaseModel):
    width: PositiveFloat
    height: PositiveFloat

    @computed_field
    @property
    def area(self) -> float:
        return self.width * self.height

    @computed_field
    @property
    def perimeter(self) -> float:
        return 2 * (self.width + self.height)
    
rect = Rectangle(width=5.0, height=10.0)
print(rect.model_dump_json())  # {'width': 5.0, 'height': 10.0, 'area': 50.0, 'perimeter': 30.0}

# Another example
class Person(BaseModel):
    name: str
    birth_year: int

    @field_validator('birth_year')
    @classmethod
    def check_age(cls, v: int) -> int:
        current_year = datetime.now().year
        if v > current_year:
            raise ValueError('birth_year cannot be in the future')
        return v

    @computed_field(description="The person's age based on the current year")
    @property
    def age(self) -> int:
        return datetime.now().year - self.birth_year
    
    
person = Person(name="Alice", birth_year=1990)
print(person.model_dump_json())  # {'name': 'Alice', 'birth_year': 1990, 'age': 35} (as of 2025)

try:
    person_invalid = Person(name="Bob", birth_year=2026)
    print(person_invalid.model_dump_json())
except ValidationError as e:     
    print(f"person_invalid instantiated failed with errors {e}")

{"width":5.0,"height":10.0,"area":50.0,"perimeter":30.0}
{"name":"Alice","birth_year":1990,"age":35}
person_invalid instantiated failed with errors 1 validation error for Person
birth_year
  Value error, birth_year cannot be in the future [type=value_error, input_value=2026, input_type=int]
    For further information visit https://errors.pydantic.dev/2.12/v/value_error


In [16]:
## Pydantic settings


# Settings Management that will be used instead of env file
os.environ['AUTH_KEY'] = 'env_auth_key'
os.environ['MY_API_KEY'] = 'env_api_key'
os.environ['ENV2'] = 'https://envurl.com'


class Settings(BaseSettings):
    auth_key: str
    api_key: str = Field(..., alias='MY_API_KEY')  # use alias to map environment variable name to field name
    env1: HttpUrl = "https://defaulturl.com"  # default value
    env2: HttpUrl

    # if no local settings are passed, it will read from .env file 
    # model_config = SettingsConfigDict(
    #     env_file = ".env",
    #     env_file_encoding = 'utf-8',
    #     case_sensitive = False,
    #     extra="ignore"  # ignore unknown environment variables instead of raising error
    # )

settings = Settings(env1="https://env1url.com")
print(settings.model_dump())   # {'auth_key': 'env_auth_key' , 'api_key': 'env_api_key', 'env1': 'https://env1url.com', 'env2': 'https://envurl.com'}
# settings = Settings() --- IGNORE ---
# print(settings.model_dump())   # {'auth_key': 'env_auth_key', 'api_key': 'env_api_key       , 'env1': 'https://defaulturl.com', 'env2': 'https://envurl.com'} --- IGNORE ---  

{'auth_key': 'env_auth_key', 'api_key': 'env_api_key', 'env1': HttpUrl('https://env1url.com/'), 'env2': HttpUrl('https://envurl.com/')}


In [17]:
# Settings Management that will be used instead of env file
os.environ['PRODUCTION_AUTH_KEY'] = 'env_auth_key'
os.environ['PRODUCTION_MY_API_KEY'] = 'env_api_key'
os.environ['PRODUCTION_ENV2'] = 'https://envurl.com'
os.environ['PRODUCTION_BLA'] = 'bla'

class Settings(BaseSettings):
    model_config = SettingsConfigDict(
        env_prefix = "PRODUCTION_",  # prefix for environment variables, it's useful for moving between different environments like PRODUCTION_, DEVELOPMENT_, TESTING_
        case_sensitive = False,
        extra="ignore"  # ignore unknown environment variables instead of raising error
    )

    auth_key: str
    api_key: str = Field(..., alias='MY_API_KEY') 
    env: HttpUrl = Field(validation_alias=AliasChoices('ENV1', 'ENV2'))  # use validation_alias to map environment variable name to field name, if ENV1 not found, it will look for ENV2

settings = Settings(env1="https://env1url.com")
print(settings.model_dump())

{'auth_key': 'env_auth_key', 'api_key': 'env_api_key', 'env': HttpUrl('https://envurl.com/')}
