In [None]:
# %pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple pureml-evaluate
# %pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple pureml
# %pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple pureml-policy
# %pip install lightgbm

In [2]:
# Import all the required files
import pureml
from pureml.decorators import load_data,transformer,dataset,model
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import warnings
import random

warnings.simplefilter("ignore")
rand_seed = 1234
np.random.seed(rand_seed)

In [None]:
@load_data()
def load_dataset():
    df = pd.read_csv('default of credit card clients.csv', header=1)

    return df

In [None]:
@transformer()
def remove_columns(df):
    return df.drop(['ID'],axis =1)

In [None]:
@transformer()
def rename_columns(df):
    return df.rename(columns={"PAY_0": "PAY_1","default payment next month":"default", "SEX":"sex"})    

In [None]:
@transformer()
def dataset_imbalances(df):
    categorical_features = ["sex", "EDUCATION", "MARRIAGE"]

    for col_name in categorical_features:
        df[col_name] = df[col_name].astype("category")

    Y, A = df.loc[:, "default"], df.loc[:, "sex"]
    X = pd.get_dummies(df.drop(columns=["default", "sex"]))


    A_str = A.map({1: "male", 2: "female"})

    A_str.value_counts(normalize=True)
    Y.value_counts(normalize=True)
    
    # Generate "Interest" column as a DataFrame
    interest_values = np.random.normal(loc=2 * Y, scale=A)
    interest_column = pd.DataFrame(interest_values, columns=["Interest"])

    # Concatenate "Interest" column with X DataFrame
    X = pd.concat([X, interest_column], axis=1)

    return {'X':X,'Y':Y,'A_str':A_str}


In [None]:
@transformer()
def resample_training_data(X_train, Y_train, A_train):
   
    negative_ids = Y_train[Y_train == 0].index
    positive_ids = Y_train[Y_train == 1].index
    balanced_ids = positive_ids.union(
        np.random.choice(a=negative_ids, size=len(positive_ids)))

    X_train = X_train.loc[balanced_ids, :]
    Y_train = Y_train.loc[balanced_ids]
    A_train = A_train.loc[balanced_ids]
    return  {"X_train": X_train, "Y_train":Y_train, "A_train": A_train}



In [None]:

@transformer()
def add_new_column(sensitive_features):
    values = ['Indian', 'African', 'American']

    list_length = sensitive_features.shape[0]
    full_list = values * (list_length // len(values))
    full_list += values[:list_length % len(values)]
    random.shuffle(full_list)

    full_list = np.array(full_list)

    s_feat = pd.concat([sensitive_features.reset_index(drop=True), pd.DataFrame(full_list, columns=['race'])], axis=1)

    return s_feat

In [None]:
@dataset(label='Credit Loan Dataset',upload=True)
def create_dataset():
    df = load_dataset()
    df = remove_columns(df)
    df = rename_columns(df)
    data  = dataset_imbalances(df)
    X,Y,A_str = data['X'],data['Y'],data['A_str']
    X_train, X_test, y_train, y_test, A_train, A_test = train_test_split(X, Y, A_str, test_size=0.35, stratify=Y)
    data = resample_training_data(X_train, y_train, A_train)
    X_train, y_train, A_train = data['X_train'],data['Y_train'],data['A_train']

    A_test = add_new_column(sensitive_features=A_test)

    return {"x_train":X_train,"y_train":y_train.to_numpy(),"x_test":X_test,"y_test":y_test.to_numpy(),"sensitive_features" : A_test}


data_created = create_dataset()

In [3]:
@model(label='Credit_Underwriting')
def create_model():
    data = pureml.dataset.fetch('Credit Loan Dataset:v1')
    x_train = data['x_train']
    y_train = data['y_train']
    lgb_params = {
    "objective": "binary",
    "metric": "auc",
    "learning_rate": 0.412,
    "num_leaves": 10,
    "max_depth": 3,
    "random_state": rand_seed,
    "n_jobs": 1,}

    pureml.log(params=lgb_params)
    estimator = Pipeline(
        steps=[
            ("preprocessing", StandardScaler()),
            ("classifier", lgb.LGBMClassifier(**lgb_params)),
        ]
    )

    estimator.fit(x_train, y_train)
    return estimator

model_lgb = create_model()

[LightGBM] [Info] Number of positive: 4313, number of negative: 4313
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000776 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3523
[LightGBM] [Info] Number of data points in the train set: 8626, number of used features: 29
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


No metrics are found in config


No figures are found in config


In [4]:
pureml.predict.add(label='Credit_Underwriting:v1',paths={'predict':'predict.py'})

In [6]:
from pureml_policy import policy_eval
#policy = faircredit
results = policy_eval.eval(framework_name  = "EU AI Act for High Risk",
            label_model='Credit Underwriting:v1',
            label_dataset='Credit Loan Dataset:v1')

Line 59. grade.py: precision
Line 60. grade.py: precision
Line  80. policy_base. risk_analysis: {'category': 'performance', 'risk': 'precision', 'value': 0.8011192423590185}
Line 59. grade.py: recall
Line 60. grade.py: recall
Line  80. policy_base. risk_analysis: {'category': 'performance', 'risk': 'recall', 'value': 0.5168008886420439}
Line 59. grade.py: accuracy
Line 60. grade.py: accuracy
Line  80. policy_base. risk_analysis: {'category': 'performance', 'risk': 'accuracy', 'value': 0.7902857142857143}
Line 59. grade.py: disparate_impact
'disparate_impact'
argument of type 'NoneType' is not iterable
Line 294. sensitive_features:           sex      race
0        male    Indian
1        male    Indian
2      female    Indian
3      female  American
4      female   African
...       ...       ...
10495  female  American
10496    male  American
10497  female  American
10498  female  American
10499    male  American

[10500 rows x 2 columns]
Line 295. type of sensitive_features: <class 'p

Line 59. grade.py: disparate_impact
'disparate_impact'
argument of type 'NoneType' is not iterable
Line 294. sensitive_features:           sex      race
0        male    Indian
1        male    Indian
2      female    Indian
3      female  American
4      female   African
...       ...       ...
10495  female  American
10496    male  American
10497  female  American
10498  female  American
10499    male  American

[10500 rows x 2 columns]
Line 295. type of sensitive_features: <class 'pandas.core.frame.DataFrame'>
Line 296. type of references: <class 'numpy.ndarray'>
Line 297. type of predictions: <class 'numpy.ndarray'>
{'disparate_impact': {'value': {'female_African': 1.0, 'female_American': 0.9573426315854092, 'female_Indian': 0.935974777657522, 'male_African': 0.8437989317333723, 'male_American': 0.8698544039717611, 'male_Indian': 0.8325545580888705}}}
Line  80. policy_base. risk_analysis: {'category': 'fairness', 'risk': 'disparate_impact', 'value': 0.8325545580888705}
Line 119. gr

Line 59. grade.py: disparate_impact
'disparate_impact'
argument of type 'NoneType' is not iterable
Line 294. sensitive_features:           sex      race
0        male    Indian
1        male    Indian
2      female    Indian
3      female  American
4      female   African
...       ...       ...
10495  female  American
10496    male  American
10497  female  American
10498  female  American
10499    male  American

[10500 rows x 2 columns]
Line 295. type of sensitive_features: <class 'pandas.core.frame.DataFrame'>
Line 296. type of references: <class 'numpy.ndarray'>
Line 297. type of predictions: <class 'numpy.ndarray'>
{'disparate_impact': {'value': {'female_African': 1.0, 'female_American': 0.9573426315854092, 'female_Indian': 0.935974777657522, 'male_African': 0.8437989317333723, 'male_American': 0.8698544039717611, 'male_Indian': 0.8325545580888705}}}
Line  80. policy_base. risk_analysis: {'category': 'fairness', 'risk': 'disparate_impact', 'value': 0.935974777657522}
Line 119. gra

Line 59. grade.py: disparate_impact
'disparate_impact'
argument of type 'NoneType' is not iterable
Line 294. sensitive_features:           sex      race
0        male    Indian
1        male    Indian
2      female    Indian
3      female  American
4      female   African
...       ...       ...
10495  female  American
10496    male  American
10497  female  American
10498  female  American
10499    male  American

[10500 rows x 2 columns]
Line 295. type of sensitive_features: <class 'pandas.core.frame.DataFrame'>
Line 296. type of references: <class 'numpy.ndarray'>
Line 297. type of predictions: <class 'numpy.ndarray'>
{'disparate_impact': {'value': {'female_African': 1.0, 'female_American': 0.9573426315854092, 'female_Indian': 0.935974777657522, 'male_African': 0.8437989317333723, 'male_American': 0.8698544039717611, 'male_Indian': 0.8325545580888705}}}
Line  80. policy_base. risk_analysis: {'category': 'fairness', 'risk': 'disparate_impact', 'value': 0.9573426315854092}
Line 119. gr

Line 59. grade.py: disparate_impact
'disparate_impact'
argument of type 'NoneType' is not iterable
Line 294. sensitive_features:           sex      race
0        male    Indian
1        male    Indian
2      female    Indian
3      female  American
4      female   African
...       ...       ...
10495  female  American
10496    male  American
10497  female  American
10498  female  American
10499    male  American

[10500 rows x 2 columns]
Line 295. type of sensitive_features: <class 'pandas.core.frame.DataFrame'>
Line 296. type of references: <class 'numpy.ndarray'>
Line 297. type of predictions: <class 'numpy.ndarray'>
{'disparate_impact': {'value': {'female_African': 1.0, 'female_American': 0.9573426315854092, 'female_Indian': 0.935974777657522, 'male_African': 0.8437989317333723, 'male_American': 0.8698544039717611, 'male_Indian': 0.8325545580888705}}}
Line  80. policy_base. risk_analysis: {'category': 'fairness', 'risk': 'disparate_impact', 'value': 1.0}
Line 119. grade.py: {'opera

Line 59. grade.py: disparate_impact
'disparate_impact'
argument of type 'NoneType' is not iterable
Line 294. sensitive_features:           sex      race
0        male    Indian
1        male    Indian
2      female    Indian
3      female  American
4      female   African
...       ...       ...
10495  female  American
10496    male  American
10497  female  American
10498  female  American
10499    male  American

[10500 rows x 2 columns]
Line 295. type of sensitive_features: <class 'pandas.core.frame.DataFrame'>
Line 296. type of references: <class 'numpy.ndarray'>
Line 297. type of predictions: <class 'numpy.ndarray'>
{'disparate_impact': {'value': {'female_African': 1.0, 'female_American': 0.9573426315854092, 'female_Indian': 0.935974777657522, 'male_African': 0.8437989317333723, 'male_American': 0.8698544039717611, 'male_Indian': 0.8325545580888705}}}
Line  80. policy_base. risk_analysis: {'category': 'fairness', 'risk': 'disparate_impact', 'value': 0.8437989317333723}
Line 119. gr

Line 59. grade.py: disparate_impact
'disparate_impact'
argument of type 'NoneType' is not iterable
Line 294. sensitive_features:           sex      race
0        male    Indian
1        male    Indian
2      female    Indian
3      female  American
4      female   African
...       ...       ...
10495  female  American
10496    male  American
10497  female  American
10498  female  American
10499    male  American

[10500 rows x 2 columns]
Line 295. type of sensitive_features: <class 'pandas.core.frame.DataFrame'>
Line 296. type of references: <class 'numpy.ndarray'>
Line 297. type of predictions: <class 'numpy.ndarray'>
{'disparate_impact': {'value': {'female_African': 1.0, 'female_American': 0.9573426315854092, 'female_Indian': 0.935974777657522, 'male_African': 0.8437989317333723, 'male_American': 0.8698544039717611, 'male_Indian': 0.8325545580888705}}}
Line  80. policy_base. risk_analysis: {'category': 'fairness', 'risk': 'disparate_impact', 'value': 0.8698544039717611}
Line 119. gr

In [None]:
results