In [5]:
from pydantic import BaseModel, EmailStr, ValidationError
from typing import Optional
from datetime import datetime

class User(BaseModel):
    name: str
    email: EmailStr
    age: int
    is_active: bool = True
    created_at: Optional[datetime] = None
    
#Test it
clean_data={
    "name":"Yash Kuletha",
    "email": "aliceinborderland@gmail.com",
    "age":"20"
}

user=User(**clean_data)
print(f"User created: {user.name}, Age: {user.age}")
print(f"Model output: {user.model_dump()}")


User created: Yash Kuletha, Age: 20
Model output: {'name': 'Yash Kuletha', 'email': 'aliceinborderland@gmail.com', 'age': 20, 'is_active': True, 'created_at': None}


In [2]:
#automatic type conversion in action
messy_data = {
    "name":"Daniel Patel",
    "email":"daniel@gmail.com",
    "age":"34",
    "is_active":"true"
}

user= User(**messy_data)
print(f"Age type: {type(user.age)}")
print(f"Is active type: {type(user.is_active)}")
print(f"User is active? {user.is_active} and his Age is: {user.age}")


Age type: <class 'int'>
Is active type: <class 'bool'>
User is active? True and his Age is: 34


In [3]:
#validation error in pydantic
invalid_data={
    "name":"",
    "email":"abcdsa",
    "age":20
}
try:
    invalid_user = User(**invalid_data)
except ValidationError as e:
    print(e)

1 validation error for User
email
  value is not a valid email address: An email address must have an @-sign. [type=value_error, input_value='abcdsa', input_type=str]


#### Dataclasses vs Pydantic and when to use both code run

So `dataclass` (can be imported using built in `dataclasses module`) is a light way to create classes that mainly store data

On the other hand Pydantic models add validation, serialization and enable framework integration

In [2]:
#Data class vs Pydantic
from dataclasses import dataclass

@dataclass
class ProductDataclass:
   name: str
   price: float
   in_stock: bool

# Fast, simple, but no validation
product = ProductDataclass("Laptop", 999.99, True)

# This also works, even though types are wrong:
broken_product = ProductDataclass(123, "expensive", "maybe")

In [5]:
from pydantic import BaseModel, Field

#FieldÂ function is used to customize and add metadata to fields within a Pydantic model.
class ProductPydantic(BaseModel):
   name: str = Field(min_length=1)
   price: float = Field(gt=0)  # Must be greater than 0
   in_stock: bool

# Automatic validation prevents bad data
try:
   product = ProductPydantic(name="", price=-10, in_stock="maybe")
except ValidationError as e:
   print(e)

# Valid data works perfectly
good_product = ProductPydantic(
   name="Laptop",
   price="999.99",  # String converted to float
   in_stock=True
)
print(f"Price of good product is: {good_product.price}")

3 validation errors for ProductPydantic
name
  String should have at least 1 character [type=string_too_short, input_value='', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/string_too_short
price
  Input should be greater than 0 [type=greater_than, input_value=-10, input_type=int]
    For further information visit https://errors.pydantic.dev/2.11/v/greater_than
in_stock
  Input should be a valid boolean, unable to interpret input [type=bool_parsing, input_value='maybe', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/bool_parsing
Price of good product is: 999.99


In [7]:
#wrapping dataclasses in python with Pydantic
from pydantic.dataclasses import dataclass

@dataclass
class User:
    name: str
    age: int

u = User(name="Alice", age="25")
print(u)
print(type(u.age))
print(f"Age of user is: {u.age} and his name is {u.name}")

User(name='Alice', age=25)
<class 'int'>
Age of user is: 25 and his name is Alice


### Building Data models with Pydantic

#### Field validation and constraints

Task - Create a product catalogue API where price data comes from multiple vendors with different formatting standards (some might send prices as strings, others as floats, etc.)

In [10]:
from pydantic import BaseModel, Field
from decimal import Decimal
from typing import Optional

class Product(BaseModel):
    name: str = Field(min_length=1, max_length=100)
    #Decimal is precise and avoids floating-point rounding errors.
    price: Decimal = Field(gt=0, le=10000)
    description: Optional[str] = Field(None, max_length=100)
    category: str = Field(pattern=r'^[A-Za-z\s]+$')
    stock_quantity: int = Field(ge=0)
    is_available: bool = True
    
valid_product= Product(
    name="wireless earbuds",
    price= "2000",
    description= "high quality sony earbuds",
    category="Electronics",
    stock_quantity=50
)

try:
    invalid_product= Product(
        name="",
        price=-50,
        category="Electronics21",
        stock_quantity=10,
        is_available=True
    )
except ValidationError as e:
    print(e)
print(valid_product.name)

3 validation errors for Product
name
  String should have at least 1 character [type=string_too_short, input_value='', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/string_too_short
price
  Input should be greater than 0 [type=greater_than, input_value=-50, input_type=int]
    For further information visit https://errors.pydantic.dev/2.11/v/greater_than
category
  String should match pattern '^[A-Za-z\s]+$' [type=string_pattern_mismatch, input_value='Electronics21', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/string_pattern_mismatch
wireless earbuds


In [11]:
#Strict mode vs Lax mode in Pydantic
from pydantic import StrictStr, StrictInt, StrictBool
class User(BaseModel):
    name: str
    email: EmailStr
    age: int
    is_active: bool = True
    created_at: Optional[datetime] = None

class StrictUser(BaseModel):
    name: StrictStr
    email: EmailStr = Field(..., strict=True)
    age: StrictInt
    is_active: StrictBool = True
    created_at: Optional[datetime] = Field(..., strict=True)
    
messy_data = {
    "name":"Daniel Patel",
    "email":"daniel@gmail.com",
    "age":"34",
    "is_active":"true"
}

user= User(**messy_data)
print(f"Age type: {type(user.age)}")
print(f"Is active type: {type(user.is_active)}")
print(f"User is active? {user.is_active} and his Age is: {user.age}")

try:
    strict_user= StrictUser(**messy_data)
    print(strict_user.age)
except ValidationError as e:
    print(e)


Age type: <class 'int'>
Is active type: <class 'bool'>
User is active? True and his Age is: 34
3 validation errors for StrictUser
age
  Input should be a valid integer [type=int_type, input_value='34', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/int_type
is_active
  Input should be a valid boolean [type=bool_type, input_value='true', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/bool_type
created_at
  Field required [type=missing, input_value={'name': 'Daniel Patel', ...4', 'is_active': 'true'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/missing


In [30]:
#Complex data and nested data validation
from typing import List
from datetime import datetime

class Address(BaseModel):
   street: str = Field(min_length=5)
   city: str = Field(min_length=2)
   postal_code: str = Field(pattern=r'^\d{5}(-\d{4})?$')
   country: str = "USA"

class Customer(BaseModel):
   name: str = Field(min_length=1)
   email: EmailStr
   shipping_address: Address
   billing_address: Optional[Address] = None

class OrderItem(BaseModel):
   product_id: int = Field(gt=0)
   quantity: int = Field(gt=0, le=100)
   unit_price: Decimal = Field(gt=0)

class Order(BaseModel):
   order_id: str = Field(pattern=r'^ORD-\d{6}$')
   customer: Customer
   items: List[OrderItem] = Field(min_items=1)
   order_date: datetime = Field(default_factory=datetime.now)

# Complex nested data validation
order_data = {
   "order_id": "ORD-123456",
   "customer": {
       "name": "John Doe",
       "email": "john@example.com",
       "shipping_address": {
           "street": "123 Main Street",
           "city": "Anytown",
           "postal_code": "12345"
       }
   },
   "items": [
       {"product_id": 1, "quantity": 2, "unit_price": "29.99"},
       {"product_id": 2, "quantity": 1, "unit_price": "149.99"}
   ],
   "order_date": "2002-10-20T10:00:00"
}

order = Order(**order_data)
print(f"Order validated with {len(order.items)} items")
print(order.order_date)

Order validated with 2 items
2002-10-20 10:00:00


In [39]:
#Optional fields and None Handling
from typing import Optional

class UserCreate(BaseModel):
   name: str = Field(min_length=1)
   email: EmailStr
   age: int = Field(ge=13, le=120)
   phone: Optional[str] = Field(None, pattern=r'^\+?1?\d{9,15}$')

class UserUpdate(BaseModel):
   name: Optional[str] = Field(None, min_length=1)
   email: Optional[EmailStr] = None
   age: Optional[int] = Field(None, ge=13, le=120)
   phone: Optional[str] = Field(None, pattern=r'^\+?1?\d{9,15}$')

# PATCH request with partial data
update_data = {"name": "Jane Smith", "age": 30}
user_update = UserUpdate(**update_data)

# Serialize only provided fields
patch_data = user_update.model_dump(exclude_none=True)
patch_data_json = user_update.model_dump_json(exclude_none=True)
print(f"Fields to update: {list(patch_data.keys())}")
print(f"Serialized dictionary data: {patch_data}")
print(f"Serialized json data: {patch_data_json}")

Fields to update: ['name', 'age']
Serialized dictionary data: {'name': 'Jane Smith', 'age': 30}
Serialized json data: {"name":"Jane Smith","age":30}


### Custom Validation and Real-World Integration

Sometimes, we can have a user registration form where password requirements vary based on subscription plans, or an API that receives address data from multiple countries with different postal code formats. These scenarios require custom validation logic that captures your specific business rules while integrating smoothly with web frameworks and configuration systems.

#### Field Validators and model validation

For custom business logic use = `@field_validators ` decorator as it transforms your validation functions into part of the model itself. 

**Task - a user registration system where different subscription tiers have different password requirements:**

In [None]:
#creating custom validation logic for single field - password based on other field using @field-validator
from pydantic import BaseModel, field_validator, Field, model_validator
import re

class UserRegistration(BaseModel):
    username: str = Field(min_length=3)
    email: EmailStr
    subscription_tier: str=Field(pattern=r'^(free|pro|enterprise)$')
    password: str
  
    @field_validator('password')
    @classmethod
    def validate_password_complexity(cls, password, info):
        #fetch the data of user's subscription tier and use free as default if its missing
        tier=info.data.get('subscription_tier')
        if len(password)<8:
            raise ValueError('Password must be atleast 8 characters long')
        
        if tier == 'enterprise' and not re.search(r'[A-Z]', password):
            raise ValueError('Enterprise accounts require uppercase letters')
        return password

try:    
    user = UserRegistration(
        username="JohnDoe",
        email="john@example.com",
        password="password123",
        subscription_tier="enterprise"
    )
except ValidationError as e:
    print(e)


1 validation error for UserRegistration
password
  Value error, Enterprise accounts require uppercase letters [type=value_error, input_value='password123', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/value_error


In [None]:
#creating custom validation logic for multiple fields using @model-validator
from datetime import datetime
from pydantic import model_validator

class EventRegistration(BaseModel):
   start_date: datetime
   end_date: datetime
   max_attendees: int = Field(gt=0)
   current_attendees: int = Field(ge=0)
  
   @model_validator(mode='after')
   def validate_event_constraints(self):
       if self.end_date <= self.start_date:
           raise ValueError('Event end date must be after start date')
          
       if self.current_attendees > self.max_attendees:
           raise ValueError('Current attendees cannot exceed maximum')
          
       return self

### Configuration management with environment variables

How to access the .env files safely using Pydantic so that our production applications are secure and deployment friendly?

Answer - Using `Pydantic's BaseSettings`

In [9]:
from pydantic_settings import BaseSettings
from pydantic import Field
from typing import List

class AppSetting(BaseSettings):
    database_url: str= Field(description='Database connection url')
    secret_key: str= Field(description='Secret key for JWT tokens')
    debug: bool= Field(default=False, alias='APP_DEBUG')
    allowed_hosts: List[str]= Field(default=["localhost"])
    
    class Config:
        env_file=".env"
        case_sensitive=False
        
#load settings automatically from environment and .env file
settings = AppSetting()
print(settings.database_url)
print(settings.secret_key)
print(settings.debug)
print(settings.allowed_hosts)

postgresql://user:password@localhost:5432/myapp
your-secret-key-here
False
['localhost', '127.0.0.1', 'yourdomain.com']
