In [58]:
! pip install https://github.com/adebayoj/fairml/archive/master.zip

Collecting https://github.com/adebayoj/fairml/archive/master.zip
  Downloading https://github.com/adebayoj/fairml/archive/master.zip
Building wheels for collected packages: fairml
  Building wheel for fairml (setup.py): started
  Building wheel for fairml (setup.py): finished with status 'done'
  Created wheel for fairml: filename=fairml-0.1.1.5-cp37-none-any.whl size=8612 sha256=4f98a69f51ec0335fdb3024a843f0085b44ebfed3732357e9729e2d18e0988e6
  Stored in directory: C:\Users\SRINIV~1.HIG\AppData\Local\Temp\pip-ephem-wheel-cache-vv6gvxac\wheels\f2\31\c0\4f89bb5d32942468c61f33ae1f59386c516e2318d033956a5c
Successfully built fairml


In [59]:
import os
import warnings
import sys

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

from fairml import audit_model

def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2


warnings.filterwarnings("ignore")
np.random.seed(40)

# Read the wine-quality csv file (make sure you're running this from the root of MLflow!)
#  Assumes wine-quality.csv is located in the same folder as the notebook
data_file = "mlFlowData.csv"
data = pd.read_csv(data_file)

# Split the data into training and test sets. (0.75, 0.25) split.
train, test = train_test_split(data)

# The predicted column is "quality" which is a scalar from [3, 9]
train_x = train.drop(["Saleprice"], axis=1)
test_x = test.drop(["Saleprice"], axis=1)
train_y = train[["Saleprice"]]
test_y = test[["Saleprice"]]

In [60]:
# FairML needs all the input features to have a minimum of two unique values to work 
# (As it looks at variation of the predictors across the inputs).
# Lets remove any columns which have less than 2 unique values in the dataframe before building the ML models

allcolslist = train_x.columns.tolist()
colswithlessthan2uniques = []
for var in allcolslist:
    if train_x[var].nunique() < 2:
        colswithlessthan2uniques.append(var)
        

fintrain_x = train_x.drop(colswithlessthan2uniques,axis=1)
fintest_x = test_x.drop(colswithlessthan2uniques,axis=1)
n_estimators=10 
max_features=int(0.6*len(fintrain_x.columns))

regressor = RandomForestRegressor(n_estimators=n_estimators, max_features=max_features)
regressor.fit(fintrain_x, train_y)

# Evaluate Metrics
predicted_qualities = regressor.predict(fintest_x)
(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

# Print out metrics
print(f"RF regression model, n_estimators = {n_estimators}, max_features = {max_features}")
print("  RMSE: %s" % rmse)
print("  MAE: %s" % mae)
print("  R2: %s" % r2)

RF regression model, n_estimators = 10, max_features = 12
  RMSE: 0.13379054856528272
  MAE: 0.10659082457297332
  R2: 0.8494744884746003


In [61]:
importances,_ = audit_model(regressor.predict, fintest_x)
print(importances)

Feature: MSSubClass,	 Importance: -0.144903482218984
Feature: LotArea,	 Importance: 0.2827473712727839
Feature: OverallQual,	 Importance: 0.31047065644207433
Feature: OverallCond,	 Importance: 0.30483048129813894
Feature: YearBuilt,	 Importance: 0.306081035665447
Feature: YearRemodAdd,	 Importance: -0.10020309907843257
Feature: MasVnrArea,	 Importance: 0.1848159168646333
Feature: BsmtFinSF1,	 Importance: 0.20757600428956952
Feature: 1stFlrSF,	 Importance: 0.2859541838933218
Feature: 2ndFlrSF,	 Importance: 0.15060986706030002
Feature: GrLivArea,	 Importance: 0.30965681873571343
Feature: BsmtFullBath,	 Importance: -0.14725320918051696
Feature: FullBath,	 Importance: 0.3033563240953315
Feature: HalfBath,	 Importance: 0.1615415587686928
Feature: Functional,	 Importance: 0.29420914103423634
Feature: Fireplaces,	 Importance: 0.1911832515374317
Feature: GarageCars,	 Importance: 0.28748261167452094
Feature: WoodDeckSF,	 Importance: 0.18370756262669344
Feature: ScreenPorch,	 Importance: 0.03600