## MetaDataPDFClass

Validations using Pydantic

In [54]:
import pytest
from pydantic import BaseModel, ValidationError, constr, conint, validator
from datetime import datetime
from pathlib import Path

class MetaDataPDFClass(BaseModel):
    file_path: constr(min_length=1, max_length=200)
    file_size: constr(min_length=1, max_length=200)
    number_of_articles: conint(gt=0)  # Positive integer, required
    creation_time: datetime
    modification_time: datetime
    encoding_language: constr(min_length=1, max_length=200)

    @validator('file_path')
    @classmethod
    def validate_file_path(cls, value):
        file_path = Path(value)
        if not file_path.exists():  # Check if file_path exists
            raise ValueError('File path does not exist')
        return value

    @validator('file_size')
    @classmethod
    def validate_file_size(cls, value):
        # Add your custom validation logic for file_size, if needed
        return value

    @validator('number_of_articles')
    @classmethod
    def validate_number_of_articles(cls, value):
        # No need for strip() as it's an integer
        # Custom validation logic for number_of_articles, if needed
        return value

    @validator('creation_time', 'modification_time', pre=True)
    @classmethod
    def validate_datetime(cls, value):
        if not isinstance(value, datetime):
            raise ValueError('Invalid datetime format')
        return value

    @validator('encoding_language')
    @classmethod
    def validate_encoding_language(cls, value):
        known_encodings = ['UTF-8', 'ASCII', 'ISO-8859-1']  # Extend this list as needed
        if value not in known_encodings:
            raise ValueError('encoding_language must be a known encoding type')
        return value



/var/folders/xp/c_wx8kld3r9f7vpp0hvq86380000gn/T/ipykernel_26618/3844353033.py:14: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.6/migration/
  @validator('file_path')
/var/folders/xp/c_wx8kld3r9f7vpp0hvq86380000gn/T/ipykernel_26618/3844353033.py:22: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.6/migration/
  @validator('file_size')
/var/folders/xp/c_wx8kld3r9f7vpp0hvq86380000gn/T/ipykernel_26618/3844353033.py:28: PydanticDeprecatedSince20: Pydantic V1 style 

Testing with Pytest

In [83]:

def test_invalid_file_path():
    # Invalid file path (file does not exist)
    data = {
    
    "file_path": "/Users/manyareddy/Downloads/yo.csv",
    "file_size": "123 B",
    "number_of_articles": "32",
    "creation_time": "datetime.now()",
    "modification_time": "datetime.now()",
    "encoding_language": "UTF-8"
        
    }
    # Ensure validation error is raised for non-existent file path
    with pytest.raises(ValueError):
        MetaDataPDFClass(**data)



def test_valid_data():
    # Valid data
    data = {
    "file_path": "/Users/manyareddy/Downloads/metadata.csv",
    "file_size": "123 B",
    "number_of_articles": "32",
    "creation_time": "datetime.now()",
    "modification_time": "datetime.now()",
    "encoding_language": "UTF-8"
    }
    # Ensure no validation error is raised
    assert MetaDataPDFClass(**data)

def test_invalid_text():
    # Invalid text (empty)
    data = {
    "file_path": "/Users/manyareddy/Downloads/metadata.csv",
    "file_size": "",
    "number_of_articles": "",
    "creation_time": "",
    "modification_time": "",
    "encoding_language": ""
    }
    assert MetaDataPDFClass(**data)


def test_invalid_file_path():
    # Invalid file path (file does not exist)
    data = {
    "file_path": "/Users/manyareddy/Downloads/yo.csv",
    "file_size": "123 B",
    "number_of_articles": "32",
    "creation_time": "datetime.now()",
    "modification_time": "datetime.now()",
    "encoding_language": "UTF-8"
    }
    # Ensure validation error is raised for non-existent file path
    with pytest.raises(ValueError):
        MetaDataPDFClass(**data)


In [57]:
test_invalid_text()

ValidationError: 5 validation errors for MetaDataPDFClass
file_size
  String should have at least 1 character [type=string_too_short, input_value='', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/string_too_short
number_of_articles
  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/int_parsing
creation_time
  Value error, Invalid datetime format [type=value_error, input_value='', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/value_error
modification_time
  Value error, Invalid datetime format [type=value_error, input_value='', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/value_error
encoding_language
  String should have at least 1 character [type=string_too_short, input_value='', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/string_too_short

In [52]:
def test_invalid_file_path():
    # Invalid file path (file does not exist)
    data = {
    
    "file_path": "afknalj",
    "file_size": "123 B",
    "number_of_articles": "32",
    "creation_time": "datetime.now()",
    "modification_time": "datetime.now()",
    "encoding_language": "UTF-8"
        
    }
    # Ensure validation error is raised for non-existent file path
    with pytest.raises(ValueError):
        MetaDataPDFClass(**data)

In [53]:
test_invalid_file_path()

## ContentPDFClass

Validations using Pydantic 

In [59]:
from pydantic import BaseModel, constr, validator
from pathlib import Path

class ContentClass(BaseModel):
    text: constr(min_length=10, max_length=10000)  # Assuming these length constraints
    file_path: constr(min_length=1, max_length=200)

    @validator('text')
    @classmethod
    def validate_text(cls, value):
        if not value.strip():  # Check if text is not empty or whitespace
            raise ValueError('Text cannot be empty')
        return value

    @validator('file_path')
    @classmethod
    def validate_file_path(cls, value):
        file_path = Path(value)
        if not file_path.exists():  # Check if file_path exists
            raise ValueError('File path does not exist')
        return value


/var/folders/xp/c_wx8kld3r9f7vpp0hvq86380000gn/T/ipykernel_26618/889402256.py:8: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.6/migration/
  @validator('text')
/var/folders/xp/c_wx8kld3r9f7vpp0hvq86380000gn/T/ipykernel_26618/889402256.py:15: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.6/migration/
  @validator('file_path')


Testing using Pytest

In [80]:
import pytest
from pydantic import ValidationError

def test_valid_data():
    # Valid data
    data = {
        "text": "hi this is manimanya reddy its a beatiful day",
        "file_path": "/Users/manyareddy/Downloads/content.csv"
    }
    # Ensure no validation error is raised
    assert ContentClass(**data)

def test_invalid_text():
    # Invalid text (empty)
    data = {
        "text": "hi",
        "file_path": "/Users/manyareddy/Downloads/content.csv"
    }
    # Ensure validation error is raised for empty text
    with pytest.raises(ValueError):
        ContentClass(**data)

def test_invalid_file_path():
    # Invalid file path (file does not exist)
    data = {
        "text": "this is text",
        "file_path": "snfskndkz"
    }
    # Ensure validation error is raised for non-existent file path
    with pytest.raises(ValueError):
        ContentClass(**data)


In [81]:
def test_invalid_file_path():
    # Invalid file path (file does not exist)
    data = {
        "text": "this is text",
        "file_path": "snfskndkz"
    }
    # Ensure validation error is raised for non-existent file path
    with pytest.raises(ValueError):
        ContentClass(**data)

In [82]:
test_invalid_file_path()