In [1]:
%pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple pureml-evaluate
%pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple pureml
%pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple pureml-policy


Looking in indexes: https://test.pypi.org/simple/, https://pypi.org/simple
Collecting pureml-evaluate
  Downloading https://test-files.pythonhosted.org/packages/0c/02/c687ea854386eb8a8382d76101bc48f2537fcc377975f0271442d2a9a0b6/pureml_evaluate-0.1.6-py3-none-any.whl (259 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m259.8/259.8 KB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting PyPDF2<4.0.0,>=3.0.1
  Using cached pypdf2-3.0.1-py3-none-any.whl (232 kB)
Collecting pydantic==1.9.1
  Using cached pydantic-1.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.0 MB)
Collecting scikit-learn<2.0.0,>=1.2.2
  Using cached scikit_learn-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.8 MB)
Collecting seaborn<0.13.0,>=0.12.2
  Using cached seaborn-0.12.2-py3-none-any.whl (293 kB)
Collecting fairlearn<0.9.0,>=0.8.0
  Using cached fairlearn-0.8.0-py3-none-any.whl (235 kB)
Collecting reportlab<5.0.0,>=4.0.4
  Using cached

In [None]:
# use 'pureml auth login' to login to your account
# pureml init -- Need to Run this in command line
# puremlconfig.yaml file will be created in the current directory
# Make sure to restart the kernel after running the above commands for the changes to take effect

In [1]:
import pureml

pureml.__version__

'0.4.5'

In [2]:
import pureml_evaluate 

pureml_evaluate.__version__

'0.1.6'

In [3]:
import pureml_policy

pureml_policy.__version__

'0.2.2'

In [4]:
# Import all the required files
import pureml
from pureml.decorators import load_data,transformer,dataset,model
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import warnings

warnings.simplefilter("ignore")
rand_seed = 1234
np.random.seed(rand_seed)

In [5]:
@load_data()
def load_dataset():
    df = pd.read_excel(io='default of credit card clients.xls',header  = 1)

    return df

In [6]:
@transformer()
def remove_columns(df):
    return df.drop(['ID'],axis =1)

In [7]:
@transformer()
def rename_columns(df):
    return df.rename(columns={"PAY_0": "PAY_1","default payment next month":"default"})    

In [8]:
@transformer()
def dataset_imbalances(df):
    categorical_features = ["SEX", "EDUCATION", "MARRIAGE"]

    for col_name in categorical_features:
        df[col_name] = df[col_name].astype("category")

    Y, A_str = df.loc[:, "default"], df.loc[:, "SEX"]
    X = pd.get_dummies(df.drop(columns=["default", "SEX"]))

    #A_str = A.map({1: "male", 2: "female"})

    A_str.value_counts(normalize=True)
    Y.value_counts(normalize=True)
    
    # Generate "Interest" column as a DataFrame_str
    interest_values = np.random.normal(loc=2 * Y, scale=A_str)
    interest_column = pd.DataFrame(interest_values, columns=["Interest"])

    # Concatenate "Interest" column with X DataFrame
    X = pd.concat([X, interest_column], axis=1)

    return {'X':X,'Y':Y,'A_str':A_str}


In [9]:
@transformer()
def resample_training_data(X_train, Y_train, A_train):
   
    negative_ids = Y_train[Y_train == 0].index
    positive_ids = Y_train[Y_train == 1].index
    balanced_ids = positive_ids.union(
        np.random.choice(a=negative_ids, size=len(positive_ids)))

    X_train = X_train.loc[balanced_ids, :]
    Y_train = Y_train.loc[balanced_ids]
    A_train = A_train.loc[balanced_ids]
    return  {"X_train": X_train, "Y_train":Y_train, "A_train": A_train}

In [11]:
from pureml.decorators import dataset

@dataset(label='credit_dataset_test_pypi3',upload=True)
def create_dataset():
    df = load_dataset()
    df = remove_columns(df)
    df = rename_columns(df)
    data  = dataset_imbalances(df)
    X,Y,A_str = data['X'],data['Y'],data['A_str']
    X_train, X_test, y_train, y_test, A_train, A_test = train_test_split(X, Y, A_str, test_size=0.35, stratify=Y)
    data = resample_training_data(X_train, y_train, A_train)
    X_train, y_train, A_train = data['X_train'],data['Y_train'],data['A_train']
    A_test = A_test.to_numpy()
    y_test = y_test.to_numpy()
    return {"x_train":X_train,"y_train":y_train,"x_test":X_test,"y_test":y_test,"sensitive_features" : A_test}
    

create_dataset()

ReadME file does not exist.
Creating an Empty ReadME file


{'x_train':        LIMIT_BAL  AGE  PAY_1  PAY_2  PAY_3  PAY_4  PAY_5  PAY_6  BILL_AMT1  \
 0          20000   24      2      2     -1     -1     -2     -2       3913   
 1         120000   26     -1      2      0      0      0      2       2682   
 3          50000   37      0      0      0      0      0      0      46990   
 9          20000   35     -2     -2     -2     -2     -1     -1          0   
 10        200000   34      0      0      2      0      0     -1      11073   
 ...          ...  ...    ...    ...    ...    ...    ...    ...        ...   
 29983      20000   44     -2     -2     -2     -2     -2     -2       1822   
 29991     210000   34      3      2      2      2      2      2       2500   
 29994      80000   34      2      2      2      2      2      2      72557   
 29997      30000   37      4      3      2     -1      0      0       3565   
 29999      50000   46      0      0      0      0      0      0      47929   
 
        BILL_AMT2  ...  EDUCATION_2  ED

In [12]:
data = pureml.dataset.fetch('credit_dataset_test_pypi3:v1')
x_test = data['x_test']
y_test = data['y_test']


In [13]:
from pureml.decorators import model
import pureml

@model(label='credit_example_test_pypi3')
def create_model():
    data = pureml.dataset.fetch('credit_dataset_test_pypi3:v1')
    x_train = data['x_train']
    y_train = data['y_train']
    lgb_params = {
    "metric": "auc",
    "learning_rate": 0.03,
    "num_leaves": 10,
    "max_depth": 3,
    "random_state": rand_seed,
    "n_jobs": 1}

    #pureml.log(params=lgb_params)
    estimator = Pipeline(
        steps=[
            ("preprocessing", StandardScaler()),
            ("classifier", lgb.LGBMClassifier(**lgb_params)),
        ]
    )

    estimator.fit(x_train, y_train)
    return estimator

create_model()

[LightGBM] [Info] Number of positive: 4313, number of negative: 4313
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002068 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3514
[LightGBM] [Info] Number of data points in the train set: 8626, number of used features: 29
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


ReadME file does not exist.
Creating an Empty ReadME file


No metrics are found in config
No params are found in config
No figures are found in config


In [14]:
import pureml


pureml.model.fetch(label='credit_example_test_pypi3:v1')

In [15]:
pureml.predict.add(label ='credit_example_test_pypi3:v1',paths={'predict':'predict.py'})

In [16]:
import pureml

pureml.predict.fetch(label='credit_example_test_pypi3:v1')

In [17]:
from pureml_policy import policy_eval
results = policy_eval.eval(framework_name  = "nyc144",
            label_model='credit_example_test_pypi3:v1',
            label_dataset='credit_dataset_test_pypi3:v1')

'balanced_accuracy'
argument of type 'NoneType' is not iterable
'balanced_acc_error'
argument of type 'NoneType' is not iterable
'disparate_impact'
argument of type 'NoneType' is not iterable
'demographic_parity_difference'
argument of type 'NoneType' is not iterable
'balanced_accuracy'
argument of type 'NoneType' is not iterable
'balanced_acc_error'
argument of type 'NoneType' is not iterable
'disparate_impact'
argument of type 'NoneType' is not iterable
'demographic_parity_difference'
argument of type 'NoneType' is not iterable
'balanced_accuracy'
argument of type 'NoneType' is not iterable
'balanced_acc_error'
argument of type 'NoneType' is not iterable
'disparate_impact'
argument of type 'NoneType' is not iterable
'demographic_parity_difference'
argument of type 'NoneType' is not iterable


In [18]:
results

{'model': 'credit_example_test_pypi3',
 'model_version': 'v1',
 'dataset': 'credit_dataset_test_pypi3',
 'dataset_version': 'v1',
 'result': [{'complete': {'complete': {'operational': {'accuracy': 'pass',
      'precision': 'pass',
      'recall': 'fail',
      'f1': 'fail'},
     'fairness': {'balanced_accuracy': 'fail',
      'balanced_acc_error': 'fail',
      'disparate_impact': 'pass',
      'demographic_parity_difference': 'fail'},
     'operational_scores': {'accuracy': '0.7839047619047619',
      'precision': '0.8037021093413689',
      'recall': '0.5073369565217392',
      'f1': '0.6220223221722473'},
     'fairness_scores': {'balanced_accuracy': '0.7909913261638971',
      'balanced_acc_error': '0.009246206981808714',
      'disparate_impact': '1.0',
      'demographic_parity_difference': '0.06686397746375322'},
     'operational_thresholds': {'accuracy': '0.7',
      'precision': '0.8',
      'recall': '0.8',
      'f1': '0.7'},
     'fairness_thresholds': {'balanced_accurac