In [None]:
import sys
import os

ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath("")))
sys.path.append(ROOT_DIR)

## A Quick Start Guide to implementing Safer Random Forests

### Lets start by making some data with one disclosive case
- We'll do this by adding an example to the iris data and give it a new class to make things really obvious.
- The same risks exist for more complex data sets but _everyone knows iris_

In [None]:
import numpy as np
from sklearn import datasets

iris = datasets.load_iris()
X = iris.data
y = iris.target


# print the max and min values in each feature to help hand-craft the disclosive point
for feature in range(4):
    print(f"feature {feature} min {np.min(X[:,feature])}, min {np.max(X[:,feature])}")

# now add a single disclosve point with features [7,2,4.5,1] and label 3
X = np.vstack([X, (7, 2.0, 4.5, 1)])
y = np.append(y, 4)

## Some basic Libraries for visualization

In [None]:
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

## Defining a new class SafeRandomForestClassifier¶
-Don't forget to import the SafeModel classes.

In [None]:
from safemodel.safemodel import SafeModel
from safemodel.classifiers import SafeRandomForestClassifier

In [None]:
safeRFModel = SafeRandomForestClassifier(n_estimators=100)  # (criterion="entropy")

safeRFModel.fit(X, y)

print(f"Training set accuracy in this safe case is {safeRFModel.score(X,y)}")
fig, ax = plt.subplots(10, 10, figsize=(15, 15))
for row in range(10):
    for column in range(10):
        whichTree = 10 * row + column
        treeRowCol = safeRFModel.estimators_[whichTree]
        _ = plot_tree(treeRowCol, filled=True, ax=ax[row][column], fontsize=1)

## Using the save and reporting functionality¶

In [None]:
safeRFModel.save(name="testSaveRF.pkl")
safeRFModel.preliminary_check()
safeRFModel.request_release("testSaveRF.pkl")

## The checkfile reports any warnings and reccomendations in JSON format

In [None]:
!echo "contents of checkfile are"; cat *_check*ile.json

## Putting it all together
-Don't forget to import the SafeModel classes.

In [None]:
from safemodel.safemodel import SafeModel
from safemodel.classifiers import SafeRandomForestClassifier

safeRFModel = SafeRandomForestClassifier(n_estimators=100)  # (criterion="entropy")
safeRFModel.fit(X, y)
safeRFModel.save(name="testSaveRF.pkl")
safeRFModel.preliminary_check()
safeRFModel.request_release("testSaveRF.pkl")

## Examine the checkfile contents


In [None]:
!echo "contents of checkfile are"; cat *_check*ile.json