In [6]:
import sys
sys.path.append("../")

In [11]:
import os
from typing import Any, Callable, Set, Union
from enum import Enum
from pydantic import (
    AliasChoices,
    AmqpDsn,
    BaseModel,
    Field,
    ImportString,
    PostgresDsn,
    RedisDsn,
    validator,
    ConfigDict,
    field_validator,
)

from pydantic_settings import BaseSettings, SettingsConfigDict
from desci_sense.shared_functions.web_extractors.metadata_extractors import MetadataExtractionType


In [12]:
def validate_env_var(env_var_name: str, value: Union[str,None]):
    """
    If `value` is not `None`, returns `value`.
    If `value` is `None`, checks if `env_var_name` is defined in the environment 
    and returns its value if so. If it isn't defined, raises an error.
    """
    if value is not None:
        return value
    else:
        env_value = os.getenv(env_var_name)
        if env_value is not None:
            return env_value
        else:
            raise ValueError(f"Environment variable '{env_var_name}' is not defined.")

In [16]:
class OpenrouterAPIConfig(BaseSettings):
    api_base: str = Field(default="https://openrouter.ai/api/v1",
                         description="Base URL for Openrouter API")
    api_key: str | None = Field(description="Openrouter API key",
                                default=None,)
    referer: str | None = Field(default=None,
                                description="Referer for tracking on Openrouter",)
    
    @field_validator('api_key')
    def load_api_key_from_env(cls, v):
        return validate_env_var("OPENROUTER_API_KEY", v)
    
    @field_validator('referer')
    def load_referer_from_env(cls, v):
        return validate_env_var("OPENROUTER_REFERRER", v)

In [18]:
class WandbConfig(BaseSettings):
    entity: str = Field(default="common-sense-makers", 
                        description="wandb entity to log runs to.")
    project: str | None = Field(description="wandb project to log runs to",
                                default=None,)
    
    @field_validator('project')
    def load_project_from_env(cls, v):
        return validate_env_var("WANDB_PROJECT", v)
    
    

In [19]:
WandbConfig(pro)

WandbConfig(entity='common-sense-makers', project='st-demo-sandbox')

In [17]:
config = OpenrouterAPIConfig()
config

OpenrouterAPIConfig(api_base='https://openrouter.ai/api/v1', api_key='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', referer='http://localhost:3000')

In [17]:

    

class ModelConfig(BaseSettings):
    name: str = Field(default="mistralai/mistral-7b-instruct", 
                            description="Name of the model to be intialized.")
    temperature: str = Field(default="0.6",
                             description="Temperature paramater to use when sampling model outputs.")

class MetadataExtractionConfig(BaseSettings):
    extraction_method: MetadataExtractionType = Field(default=MetadataExtractionType.NONE, 
                                                      description="Type of URL metadata extraction method to use.")
    max_summary_length: int = Field(default=500,
                                    description="Maximum length of summary to extract -  \
                                          anything beyond will be truncated. Set to -1 to take full length.")

class KeywordExtractionConfig(BaseSettings):
    kw_model_config: ModelConfig = Field(default_factory=ModelConfig,
                                      description="Model configuration for the \
                                        keyword extraction model")
    kw_metadata_config: MetadataExtractionConfig = Field(default_factory=MetadataExtractionConfig,
                                                         description="Metadata extraction config for \
                                                          keyword extraction model.")
    enabled: bool = Field(default=True, 
                          description="Whether keyword extraction is enabled or not.")
    ref_metadata_method: MetadataExtractionType = Field(default=MetadataExtractionType.NONE, description="Type of URL metadata extraction method to use.")
    max_keywords: int = Field(default=6,
                              description="Maximum number of keywords to extract. \
                                Set to -1 for unlimited keywords.")
    
    
class WandbConfig(BaseSettings):
    model_config = ConfigDict(
        populate_by_name=True
    )
    entity: str = Field(default="common-sense-makers", 
                        description="wandb entity to log runs to.")
    project: str = Field(alias="WANDB_PROJECT",
                         description="wandb project to log runs to")
    
    

    

class ParserConfig(BaseSettings):
    semantic_model_config: ModelConfig = Field(default_factory=ModelConfig,
                                      description="Model configuration for the \
                                        semantic parsing model")
    openrouter_api_config: OpenrouterAPIConfig = Field(default_factory=OpenrouterAPIConfig,
                                                       description="Settings for Openrouter API.")
    metadata_extract_config: MetadataExtractionConfig = Field(default_factory=MetadataExtractionConfig,
                                                         description="Metadata extraction config for \
                                                          semantic parsing model.")
    wandb_config: WandbConfig = Field(default_factory=WandbConfig,
                                      description="Wandb config for analytics tracking.")
    keyword_config: KeywordExtractionConfig = Field(default_factory=KeywordExtractionConfig,
                                                    description="Config for keyword extraction.")

In [18]:
WandbConfig(entity="s", project="d")

ValidationError: 1 validation error for WandbConfig
project
  Extra inputs are not permitted [type=extra_forbidden, input_value='d', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/extra_forbidden

In [83]:
pc = ParserConfig()
pc_json_str = pc.model_dump_json(by_alias=True)
print(pc.model_dump_json(indent=4, by_alias=True))


{
    "semantic_model_config": {
        "name": "mistralai/mistral-7b-instruct",
        "temperature": "0.6"
    },
    "openrouter_api_config": {
        "api_base": "https://openrouter.ai/api/v1",
        "OPENROUTER_API_KEY": "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",
        "OPENROUTER_REFERRER": "http://localhost:3000"
    },
    "metadata_extract_config": {
        "extraction_method": "none",
        "max_summary_length": 500
    },
    "wandb_config": {
        "entity": "common-sense-makers",
        "WANDB_PROJECT": "st-demo-sandbox"
    },
    "keyword_config": {
        "kw_model_config": {
            "name": "mistralai/mistral-7b-instruct",
            "temperature": "0.6"
        },
        "kw_metadata_config": {
            "extraction_method": "none",
            "max_summary_length": 500
        },
        "enabled": true,
        "ref_metadata_method": "none",
        "max_keywords": 6
    }
}


In [84]:
ParserConfig.model_validate_json(pc_json_str)

ParserConfig(semantic_model_config=ModelConfig(name='mistralai/mistral-7b-instruct', temperature='0.6'), openrouter_api_config=OpenrouterAPIConfig(api_base='https://openrouter.ai/api/v1', api_key='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', referer='http://localhost:3000'), metadata_extract_config=MetadataExtractionConfig(extraction_method=<MetadataExtractionType.NONE: 'none'>, max_summary_length=500), wandb_config=WandbConfig(entity='common-sense-makers', project='st-demo-sandbox'), keyword_config=KeywordExtractionConfig(kw_model_config=ModelConfig(name='mistralai/mistral-7b-instruct', temperature='0.6'), kw_metadata_config=MetadataExtractionConfig(extraction_method=<MetadataExtractionType.NONE: 'none'>, max_summary_length=500), enabled=True, ref_metadata_method=<MetadataExtractionType.NONE: 'none'>, max_keywords=6))

In [72]:
wandb_config = WandbConfig()
wandb_config

WandbConfig(entity='common-sense-makers', project='st-demo-sandbox')

In [26]:
from pydantic import BaseModel, ConfigDict, Field


class User(BaseSettings):
    model_config = ConfigDict(populate_by_name=True, extra=True)

    name: str = Field(alias='full_name')  
    age: int


user = User(full_name='John Doe', age=20)  
print(user)
user = User(name='John Doe', age=20)  
print(user)

name='John Doe' age=20


ValidationError: 1 validation error for User
name
  Extra inputs are not permitted [type=extra_forbidden, input_value='John Doe', input_type=str]
    For further information visit https://errors.pydantic.dev/2.6/v/extra_forbidden

In [30]:
class WandbConfig(BaseSettings):
    model_config = ConfigDict(
        populate_by_name=True, extra="ignore"
    )
    entity: str = Field(default="common-sense-makers", 
                        description="wandb entity to log runs to.")
    project: str = Field(alias="WANDB_PROJECT",
                         description="wandb project to log runs to")


print(WandbConfig(entity="s", WANDB_PROJECT="d"))
print(WandbConfig(entity="s", project="d"))

entity='s' project='d'
entity='s' project='d'


In [4]:
from pydantic.tools import parse_obj_as

In [33]:
from pydantic import BaseModel, ConfigDict, Field
import os

class User(BaseSettings):
    model_config = ConfigDict(populate_by_name=True)

    name: str = Field(alias='full_name')  
    age: int


user = User(full_name='John Doe', age=20)  
print(user)
user = User(name='John Doe', age=20)  
print(user)

name='John Doe' age=20
name='John Doe' age=20


In [None]:
os.environ["full_name"] = "foo"

In [5]:
import json
d = {
    "kw_config": {
        "enabled": True,
        "ref_metadata_method": "none"
    }
}
d_str = json.dumps(d)

In [6]:
ParserConfig.model_validate_json(d_str)

ValidationError: 1 validation error for ParserConfig
kw_config.ref_metadata_method
  Input should be 'pear' or 'banana' [type=enum, input_value='none', input_type=str]

In [58]:
from pydantic import BaseModel, Field
from typing import Optional

# Define Model A
class ModelConfig(BaseSettings, BaseModel):
    name: str = Field(default="mistralai/mistral-7b-instruct", 
                            description="Name of the model to be intialized.")
    temperature: str = Field(default="0.6",
                             description="Temperature paramater to use when sampling model outputs.")

# Define Model B which nests Model A
class KeywordExtractionConfig(BaseSettings, BaseModel):
    kw_model_config: ModelConfig = Field(default_factory=ModelConfig, description="Model configuration for the \
                                        keyword extraction model")  # Default factory for nested ModelA
    enabled: bool = Field(default=True, 
                          description="Whether keyword extraction is enabled or not.")
    ref_metadata_method: MetadataExtractionType = Field(default=MetadataExtractionType.NONE,
                                                        description="Type of URL metadata extraction method to use.")
    max_keywords: int = Field(default=6,
                              description="Maximum number of keywords to extract. \
                                Set to -1 for unlimited keywords.")

# Example Usage
model_b_instance = KeywordExtractionConfig()
# print(model_b_instance.json(indent=2))

TypeError: 'FieldInfo' object is not iterable

In [50]:
model_b_instance.model_dump_json()

'{"model_a":{"name":"mistralai/mistral-7b-instruct","temperature":"0.6"},"enabled":true,"ref_metadata_method":"none","max_keywords":6}'

In [None]:
model_b_instance

In [None]:
ParserConfig(kw_config=KeywordExtractionConfig)

In [3]:
config = init_config()
config

{'general': {'parser_type': 'base'},
 'model': {'model_name': 'mistralai/mistral-7b-instruct', 'temperature': 0.6},
 'prompt': {'template_path': 'desci_sense/prompting/templates/p4.txt'},
 'wandb': {'entity': 'common-sense-makers', 'project': 'st-demo'}}

In [7]:
type(config)

confection.Config

In [4]:
print(config.to_str())

[general]
parser_type = "base"

[model]
model_name = "mistralai/mistral-7b-instruct"
temperature = 0.6

[prompt]
template_path = "desci_sense/prompting/templates/p4.txt"

[wandb]
entity = "common-sense-makers"
project = "st-demo"


In [6]:
Config().from_disk("/home/rkl25/dev/common_sense/desci-sense/etc/configs/config_multi_st.cfg")

{'general': {'parser_type': 'base'},
 'model': {'model_name': 'fireworks/mixtral-8x7b-fw-chat',
  'temperature': 0.6000000000000001},
 'prompt': {'template_path': 'desci_sense/prompting/templates/p5_multi.txt'},
 'wandb': {'entity': 'common-sense-makers', 'project': 'st-demo-sandbox'}}

In [9]:
config["model"]["temperature"] = 0.7

In [10]:
config

{'general': {'parser_type': 'base'},
 'model': {'model_name': 'mistralai/mistral-7b-instruct', 'temperature': 0.7},
 'prompt': {'template_path': 'desci_sense/prompting/templates/p4.txt'},
 'wandb': {'entity': 'common-sense-makers', 'project': 'st-demo'}}