In [3]:
import os
import sys
import pandas as pd

In [4]:
# Define project Path in Colab
PROJECT_BASE_PATH = '/content/drive/MyDrive/Project_01' 

# ADD 'src' DIRECTORY TO PYTHON PATH
SRC_PATH = os.path.join(PROJECT_BASE_PATH, 'src')

# verify if SRC_PATH is already in sys.path
if SRC_PATH not in sys.path:
    sys.path.insert(0, SRC_PATH)
    print("✅ Successfully added 'src' directory to Python path.")

# IMPORT Paths CLASS FROM config MODULE
from config import Paths

✅ Successfully added 'src' directory to Python path.


In [5]:
try:
    from config import Paths
    
    cfg = Paths(PROJECT_BASE_PATH)
    cfg.create_dirs() 
    
    print("\n✅ Project configuration (Paths) initialized successfully.")
    print(f"Raw Data Path check: {cfg.TRAIN_RAW_FILE}")
    
except ImportError:
    print("❌ Error: Could not import Paths from config module.")


✅ Project configuration (Paths) initialized successfully.
Raw Data Path check: /content/drive/MyDrive/Project_01/data/raw/application_train.csv


In [None]:
schema_df_train = pd.read_csv(
    os.path.join(
        cfg.REPORT_DIR, 'data_train_schema.csv'
        )
    )

In [7]:
features_dtypes_dict = {}

for index, row in schema_df_train.iterrows():
    # Access columns using the dictionary-like structure of 'row'
    feature_name = row['feature_name']
    data_type = row['data_type']
    
    features_dtypes_dict[feature_name] = data_type

In [12]:
features_dtypes_dict

{'SK_ID_CURR': 'int64',
 'TARGET': 'int64',
 'NAME_CONTRACT_TYPE': 'object',
 'CODE_GENDER': 'object',
 'FLAG_OWN_CAR': 'object',
 'FLAG_OWN_REALTY': 'object',
 'CNT_CHILDREN': 'int64',
 'AMT_INCOME_TOTAL': 'float64',
 'AMT_CREDIT': 'float64',
 'AMT_ANNUITY': 'float64',
 'AMT_GOODS_PRICE': 'float64',
 'NAME_TYPE_SUITE': 'object',
 'NAME_INCOME_TYPE': 'object',
 'NAME_EDUCATION_TYPE': 'object',
 'NAME_FAMILY_STATUS': 'object',
 'NAME_HOUSING_TYPE': 'object',
 'REGION_POPULATION_RELATIVE': 'float64',
 'DAYS_BIRTH': 'int64',
 'DAYS_EMPLOYED': 'int64',
 'DAYS_REGISTRATION': 'float64',
 'DAYS_ID_PUBLISH': 'int64',
 'OWN_CAR_AGE': 'float64',
 'FLAG_MOBIL': 'int64',
 'FLAG_EMP_PHONE': 'int64',
 'FLAG_WORK_PHONE': 'int64',
 'FLAG_CONT_MOBILE': 'int64',
 'FLAG_PHONE': 'int64',
 'FLAG_EMAIL': 'int64',
 'OCCUPATION_TYPE': 'object',
 'CNT_FAM_MEMBERS': 'float64',
 'REGION_RATING_CLIENT': 'int64',
 'REGION_RATING_CLIENT_W_CITY': 'int64',
 'WEEKDAY_APPR_PROCESS_START': 'object',
 'HOUR_APPR_PROCESS_

In [None]:
# --- 1. Define Utility Functions (Map Dtypes) ---
def map_pandas_to_python_type(pandas_dtype: np.dtype) -> Any:
    # ... (Insert the full map_pandas_to_python_type function here) ...
    dtype_str = str(pandas_dtype)
    if 'int' in dtype_str:
        return Optional[int]
    # ... (rest of mapping logic) ...
    elif 'float' in dtype_str:
        return Optional[float]
    elif 'object' in dtype_str:
        return Optional[str]
    else:
        return Optional[Any]

def generate_schemas_file(schema_df: pd.DataFrame, output_path: str):
    
    # 2. Convert DataFrame to Pydantic Fields Dictionary
    pydantic_fields = {}
    for row in schema_df.itertuples():
        feature_name = row.feature_name
        dtype = row.data_type # Assuming data_type is stored as a string like 'float64'
        
        if feature_name == 'TARGET':
            continue
            
        pydantic_type = map_pandas_to_python_type(np.dtype(dtype)) # Convert string back to dtype object

        if feature_name == 'SK_ID_CURR':
            field_definition = (int, Field(..., description="Unique ID of the application (MANDATORY)."))
        elif pydantic_type in (Optional[str], Optional[int], Optional[float]):
            field_definition = (pydantic_type, None) # Default to None for optional fields
        else:
            field_definition = (pydantic_type, None) # Default to None for safety

        pydantic_fields[feature_name] = field_definition
        
    # 3. Dynamically Create the Model
    LoanApplicationRawInput = create_model(
        'LoanApplicationRawInput', 
        __base__=pd.BaseModel, # Assuming BaseModel is available
        **pydantic_fields
    )

    # 4. Generate the source code representation
    # Pydantic doesn't have a direct 'to_code' method for dynamic classes, 
    # so we manually format the class string.
    
    # Start the class definition string
    class_definition = (
        "from pydantic import BaseModel, Field\n"
        "from typing import Optional, List\n\n"
        "class LoanApplicationRawInput(BaseModel):\n"
    )
    
    # Add each feature as a field string
    for name, definition in pydantic_fields.items():
        type_str = str(definition[0]).replace("typing.", "") # Clean up Optional[Type] string
        
        # Determine if it's a required field or defaults to None
        if definition[1] is ...:
            default_str = f"Field(..., description='{definition[1].description}')"
        elif definition[1] is None:
            default_str = "None"
        else:
             # Handle other defaults if necessary
             default_str = str(definition[1])
             
        class_definition += f"    {name}: {type_str} = {default_str}\n"

    # Add the Response model for completeness
    class_definition += (
        "\nclass PredictionResponse(BaseModel):\n"
        "    SK_ID_CURR: int\n"
        "    probability_of_default: float\n"
    )

    # 5. Write the definition to schemas.py
    with open(output_path, 'w') as f:
        f.write(class_definition)
        
    print(f"✅ Successfully generated schemas.py at {output_path}")

In [None]:
generate_schemas_file(schema_df_train, os.path.join(cfg.SRC_DIR, 'schemas.py'))