# Decision Engine
This project simulates a professional Decision Engine (like Oscilar or Provenir). It uses a two-layered architecture: Policy Rules (Hard constraints) and Predictive Scoring (Decision Tree model).

## Import libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
import json

## Data upload and cleaning

In [41]:
training_data_path = "/Users/linaruiz/Documents/PolicyRiskProjects/credit_risk_training_data.csv"
data = pd.read_csv(training_data_path)

In [29]:
data.head(2)

Unnamed: 0,user_id,age,income,credit_score,existing_loans,employment_status,residence_type,loan_approved
0,1,56,1117,636,3,unemployed,mortgage,0
1,2,69,3196,375,1,employed,mortgage,1


QA Step: Ensures JSON schema and data types are correct.

In [32]:
# Missing values
# Option A: Drop rows with any missing values
data = data.dropna()#(subset=['credit_score', 'income'])

# Option B: Fill missing values with the median
#df['income'] = df['income'].fillna(df['income'].median())

In [30]:
required_fields = {
            "user_id": int, 
            "age": int, 
            "income": (int, float), 
            "credit_score": int, 
            "existing_loans": int, 
            "employment_status": str, 
            "residence_type": str
        }

In [14]:
# 1. Check for missing fields
missing = [field for field in required_fields if field not in data]
missing

[]

In [16]:
# 2. Check for data types
for field, expected_type in required_fields.items():
    print(expected_type)
    if not isinstance(data[field], expected_type):
        f"INVALID_TYPE: {field} (Expected {expected_type})"

<class 'int'>
<class 'int'>
(<class 'int'>, <class 'float'>)
<class 'int'>
<class 'int'>
<class 'str'>
<class 'str'>


In [27]:
# 3. Logical bounds (Sanity Checks)
if (data['age'] < 0) | (data['age'] > 120):
    "DATA_ANOMALY: Age out of bounds"
if data['income'] < 0:
    "DATA_ANOMALY: Negative income"
else:
    "VALID"

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

## Creating the decision tree model

In [52]:
def train_model(df):
        # Mapping categories to numbers (Common in decision engines)
        df['emp_map'] = df['employment_status'].map({'employed': 1, 'unemployed': 0, 'self-employed': 2})
        df['res_map'] = df['residence_type'].map({'owned': 2, 'rented': 1, 'mortgage': 0})
        
        X = df[['age', 'income', 'credit_score', 'existing_loans', 'emp_map', 'res_map']]
        y = df['loan_approved']
        
        clf = DecisionTreeClassifier(max_depth=5)
        return clf.fit(X, y)

In [53]:
model = train_model(data)
model

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,5
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [88]:
y

0       0
1       1
2       0
3       1
4       0
       ..
1995    1
1996    1
1997    0
1998    1
1999    0
Name: loan_approved, Length: 2000, dtype: int64

In [None]:
# estimate precision, recall, f1score

## JSON case

### Data validation

In [None]:
# This is an input example
payload = '{"user_id": 5, "age": 30, "income": 4000, "credit_score": 650, "existing_loans": 0, "employment_status": "employed", "residence_type": "owned"}'
payload1 = '{"user_idi": 5, "age": 30, "income": 4000, "credit_score": 650, "existing_loans": 0, "employment_status": "employed", "residence_type": "owned"}'
payload2 = '{"user_id": 5, "age": 30, "income": -4000, "credit_score": 650, "existing_loans": 0, "employment_status": "employed", "residence_type": "owned"}'

In [97]:
# This is an input example
data_json = json.loads(payload)
data_json1 = json.loads(payload1)
data_json2 = json.loads(payload2)
data_json

{'user_id': 5,
 'age': 30,
 'income': 4000,
 'credit_score': 650,
 'existing_loans': 0,
 'employment_status': 'employed',
 'residence_type': 'owned'}

In [81]:
def validate_input(data):
        """QA Step: Ensures JSON schema and data types are correct."""
        # 1. Check for missing fields
        missing = [field for field in required_fields if field not in data]
        if missing:
            return False, f"MISSING_FIELDS: {missing}"
        
        # 2. Check for data types
        for field, expected_type in required_fields.items():
            if not isinstance(data[field], expected_type):
                return False, f"INVALID_TYPE: {field} (Expected {expected_type})"
        
        # 3. Logical bounds (Sanity Checks)
        if data['age'] < 0 or data['age'] > 120:
            return False, "DATA_ANOMALY: Age out of bounds"
        if data['income'] < 0:
            return False, "DATA_ANOMALY: Negative income"
            
        return True, "VALID"

In [98]:
print(validate_input(data_json))
print(validate_input(data_json1))
print(validate_input(data_json2))

(True, 'VALID')
(False, "MISSING_FIELDS: ['user_id']")
(False, 'DATA_ANOMALY: Negative income')


### Policy Check

In [86]:
def evaluate_policy(data):
        """Phase 1: Hard Rules (Policy Layer)"""
        if data['age'] < 21:
            return False, "REJECT_POLICY: MIN_AGE_REQUIREMENT"
        if data['income'] < 1200:
            return False, "REJECT_POLICY: INSUFFICIENT_INCOME"
        if data['credit_score'] < 400:
            return False, "REJECT_POLICY: CRITICAL_LOW_SCORE"
        return True, "SUCCESS"

In [87]:
evaluate_policy(data_json)

(True, 'SUCCESS')

### Evaluate Score

In [55]:
def predict_score(data):
        """Phase 2: Predictive Logic (Decision Tree Layer)"""
        emp_map = {'employed': 1, 'unemployed': 0, 'self-employed': 2}
        res_map = {'owned': 2, 'rented': 1, 'mortgage': 0}
        
        features = [[
            data['age'], data['income'], data['credit_score'],
            data['existing_loans'], emp_map.get(data['employment_status'], 1),
            res_map.get(data['residence_type'], 1)
        ]]
        
        prediction = model.predict(features)[0]
        return "APPROVED" if prediction == 1 else "REJECTED_MODEL"

In [99]:
print(predict_score(data_json))
print(predict_score(data_json2)) # ??? it has a critical low score

APPROVED
APPROVED




## Class

In [60]:
class CreditDecisionEngine:
    
    def __init__(self, training_data_path):
        # Load data and train model (assumes model is trained as per previous step)
        self.required_fields = {
            "user_id": int, 
            "age": int, 
            "income": (int, float), 
            "credit_score": int, 
            "existing_loans": int, 
            "employment_status": str, 
            "residence_type": str
        }
        # 1. Load and Train the 'Brain'
        df = pd.read_csv(training_data_path)
        self.model = self._train_model(df)
        print("Model trained and loaded into Engine.")

    
    def _train_model(self, df):
        # Mapping categories to numbers (Common in decision engines)
        df['emp_map'] = df['employment_status'].map({'employed': 1, 'unemployed': 0, 'self-employed': 2})
        df['res_map'] = df['residence_type'].map({'owned': 2, 'rented': 1, 'mortgage': 0})
        
        X = df[['age', 'income', 'credit_score', 'existing_loans', 'emp_map', 'res_map']]
        y = df['loan_approved']
        
        clf = DecisionTreeClassifier(max_depth=5)
        return clf.fit(X, y)
    
    def validate_input(self, data):
        """QA Step: Ensures JSON schema and data types are correct."""
        # 1. Check for missing fields
        missing = [field for field in self.required_fields if field not in data]
        if missing:
            return False, f"MISSING_FIELDS: {missing}"
        
        # 2. Check for data types
        for field, expected_type in self.required_fields.items():
            if not isinstance(data[field], expected_type):
                return False, f"INVALID_TYPE: {field} (Expected {expected_type})"
        
        # 3. Logical bounds (Sanity Checks)
        if data['age'] < 0 or data['age'] > 120:
            return False, "DATA_ANOMALY: Age out of bounds"
        if data['income'] < 0:
            return False, "DATA_ANOMALY: Negative income"
            
        return True, "VALID"
    
    def evaluate_policy(self, data):
        """Phase 1: Hard Rules (Policy Layer)"""
        if data['age'] < 21:
            return False, "REJECT_POLICY: MIN_AGE_REQUIREMENT"
        if data['income'] < 1200:
            return False, "REJECT_POLICY: INSUFFICIENT_INCOME"
        if data['credit_score'] < 400:
            return False, "REJECT_POLICY: CRITICAL_LOW_SCORE"
        return True, "SUCCESS"
    
    def predict_score(self, data):
        """Phase 2: Predictive Logic (Decision Tree Layer)"""
        emp_map = {'employed': 1, 'unemployed': 0, 'self-employed': 2}
        res_map = {'owned': 2, 'rented': 1, 'mortgage': 0}
        
        features = [[
            data['age'], data['income'], data['credit_score'],
            data['existing_loans'], emp_map.get(data['employment_status'], 1),
            res_map.get(data['residence_type'], 1)
        ]]
        
        prediction = self.model.predict(features)[0]
        return "APPROVED" if prediction == 1 else "REJECTED_MODEL"

    def process_request(self, payload_json):
        """Main Orchestrator."""
        try:
            data = json.loads(payload_json)
        except json.JSONDecodeError:
            return {"status": "ERROR", "reason": "MALFORMED_JSON"}

        # Step 1: Validation
        is_valid, val_msg = self.validate_input(data)
        if not is_valid:
            return {"status": "ERROR", "reason": val_msg, "layer": "VALIDATION"}

        # Step 1: Policy Check
        passed, msg = self.evaluate_policy(data)
        if not passed:
            return {"user_id": data['user_id'], "decision": "DECLINED", "reason": msg, "layer": "POLICY"}
            
        # Step 2: Scoring Check
        result = self.predict_score(data)
        return {"user_id": data['user_id'], "decision": result, "layer": "SCORING"}

In [106]:
engine = CreditDecisionEngine(training_data_path) # instancing
res = engine.process_request(payload)
print(res)
print(f"{data_json['user_id']:<5} | {res['decision']:<10} | {res.get('layer', 'N/A')}")


Model trained and loaded into Engine.
{'user_id': 5, 'decision': 'APPROVED', 'layer': 'SCORING'}
5     | APPROVED   | SCORING




In [105]:
payload2 = '{"user_id": 5, "age": 30, "income": 0, "credit_score": 650, "existing_loans": 0, "employment_status": "employed", "residence_type": "owned"}'
res = engine.process_request(payload2)
print(res)
print(f"{data_json2['user_id']:<5} | {res['decision']:<10} | {res.get('layer', 'N/A')}")

{'user_id': 5, 'decision': 'DECLINED', 'reason': 'REJECT_POLICY: INSUFFICIENT_INCOME', 'layer': 'POLICY'}
5     | DECLINED   | POLICY


In [75]:
def run_qa_suite(engine):
    tests = [
        {
            "name": "Happy Path (Strong Profile)",
            "payload": '{"user_id": 1, "age": 30, "income": 5000, "credit_score": 750, "existing_loans": 0, "employment_status": "employed", "residence_type": "owned"}',
            "expected": "APPROVED"
        },
        {
            "name": "Edge Case (Exactly 21 years old)",
            "payload": '{"user_id": 2, "age": 21, "income": 2000, "credit_score": 600, "existing_loans": 1, "employment_status": "employed", "residence_type": "rented"}',
            "expected": "APPROVED"
        },
        {
            "name": "Policy Rejection (Underage)",
            "payload": '{"user_id": 3, "age": 18, "income": 5000, "credit_score": 700, "existing_loans": 0, "employment_status": "employed", "residence_type": "owned"}',
            "expected": "DECLINED"
        }#,
        #{
        #    "name": "Validation Error (String instead of Int)",
        #    "payload": '{"user_id": 4, "age": "thirty", "income": 5000, "credit_score": 750, "existing_loans": 0, "employment_status": "employed", "residence_type": "owned"}',
        #    "expected": "ERROR"
        #},
        #{
        #    "name": "Validation Error (Missing Field)",
        #    "payload": '{"user_id": 5, "age": 35}', 
        #    "expected": "ERROR"
        #},
        #{
        #    "name": "Data Anomaly (Negative Income)",
        #    "payload": '{"user_id": 6, "age": 40, "income": -100, "credit_score": 750, "existing_loans": 0, "employment_status": "employed", "residence_type": "owned"}',
        #    "expected": "ERROR"
        #}
    ]

    print(f"{'TEST NAME':<35} | {'RESULT':<10} | {'REASON/LAYER'}")
    print("-" * 70)
    for t in tests:
        res = engine.process_request(t['payload'])
        print(res)
        print(f"{t['name']:<35} | {res['decision']:<10} | {res.get('layer', 'N/A')}")



In [None]:
# Usage:
engine = CreditDecisionEngine(training_data_path)
run_qa_suite(engine)

Model trained and loaded into Engine.
TEST NAME                           | RESULT     | REASON/LAYER
----------------------------------------------------------------------
{'user_id': 1, 'decision': 'APPROVED', 'layer': 'SCORING'}
Happy Path (Strong Profile)         | APPROVED   | SCORING
{'user_id': 2, 'decision': 'APPROVED', 'layer': 'SCORING'}
Edge Case (Exactly 21 years old)    | APPROVED   | SCORING
{'user_id': 3, 'decision': 'DECLINED', 'reason': 'REJECT_POLICY: MIN_AGE_REQUIREMENT', 'layer': 'POLICY'}
Policy Rejection (Underage)         | DECLINED   | POLICY


