# Data Validation

## Example 1: `FillCategoricalMissingValue` Transform

In [None]:
# Install our Python SDK
%pip install sigtech 

In [None]:
# Import our Python SDK and other libraries we'll need
import sigtech.api as sig
import tempfile
import os
import json
from pathlib import Path
from uuid import uuid4

In [None]:
# Define your API key as a string. Remember to delete it before sharing your notebook with others. Replace 
# <YOUR_API_KEY> with the API key you have generated. e.g. os.environ['SIGTECH_API_KEY'] = 'sig_A1B2C3D4E5f6g7h8i9'
os.environ['SIGTECH_API_KEY'] = '<YOUR_API_KEY>'

In [None]:
# Configure logger
import logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

#### 3. Create a session
After installing our Python SDK, defining your API key, importing any additional Python libraries or functions you require, and setting any default parameters, initialize your session.

In [None]:
sig.init()

# 1. Data validation with auto-generated rules

#### 1.1 Get an existing data validation project, or create if does not exists

In [None]:
project_name = f"ice_bond_references_{str(uuid4())}"

In [None]:
ice_bond_references = sig.data.Validation(name=project_name)

#### 1.2 Add files

In [None]:
upload_response = ice_bond_references.upload_file(
    path=os.path.join("<SIGTECH_ROOT_DIR>", "examples", "validation", "data", "bond_references_data.csv")
)
print(upload_response)

#### 1.3 Run validation

Auto-generated validation rules are used on the first run. These can be further refined as we will see later. Setting up validation rules and parameters is an iterative process and dependent on the nature of the dataset.

In [None]:
validation_results = ice_bond_references.validate()
print(validation_results)

#### 1.4 View current project config

In [None]:
config = ice_bond_references.get_config()
print(config)

## 2. Edit project config to include `FillCategoricalMissingValue` Transform

#### 2.1 Introduction to the `FillCategoricalMissingValue` Transform

In [None]:
dir(sig.data.transforms)

In [None]:
sig.data.transforms.FillCategoricalMissingValue?

#### 2.2 Set `NoNullRule` rule and `FillCategoricalMissingValue` transform

In [None]:
dir(sig.data.rules)

In [None]:
sig.data.rules.NoNullRule?

In [None]:
r = sig.data.rules.NoNullRule(columns=["CreditRating"])

In [None]:
ice_bond_references.update_config(
    transforms=[
        sig.data.transforms.FillCategoricalMissingValue(
            column="CreditRating", category_column="Issuer", threshold=50.0
        )
    ],
    rules=[sig.data.rules.NoNullRule(columns=["CreditRating"])],
)

#### 2.3 View new config

In [None]:
config = ice_bond_references.get_config()
print(config)

## 3. Run data validation with the new config

In [None]:
validation_results = ice_bond_references.validate()
print(validation_results)

#### Clean up

In [None]:
ice_bond_references.delete()