In [3]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest

rng = np.random.RandomState(42)

# Generate train data
X = 0.3 * rng.randn(100, 2)
X_train = np.r_[X + 2, X - 2]
# Generate some regular novel observations
X = 0.3 * rng.randn(20, 2)
X_test = np.r_[X + 2, X - 2]
# Generate some abnormal novel observations
X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))

In [8]:
X_test

array([[ 2.10733621,  2.16823536],
       [ 2.32491537,  2.31614062],
       [ 1.58669919,  1.71865249],
       [ 2.15451058,  2.15413579],
       [ 2.15451431,  3.15581945],
       [ 2.17126715,  2.34066969],
       [ 2.28620053,  2.19541738],
       [ 1.90541923,  2.22769077],
       [ 1.76815244,  1.92895442],
       [ 1.85439094,  2.02456224],
       [ 2.69439757,  1.43982044],
       [ 2.20587806,  1.51618524],
       [ 1.85842044,  2.32668518],
       [ 2.01928401,  1.67667657],
       [ 1.78540889,  2.20387932],
       [ 1.78089001,  2.06493758],
       [ 2.01367155,  1.8045199 ],
       [ 2.64318323,  2.19017571],
       [ 1.39245722,  2.05593629],
       [ 1.80146406,  2.25573   ],
       [-1.89266379, -1.83176464],
       [-1.67508463, -1.68385938],
       [-2.41330081, -2.28134751],
       [-1.84548942, -1.84586421],
       [-1.84548569, -0.84418055],
       [-1.82873285, -1.65933031],
       [-1.71379947, -1.80458262],
       [-2.09458077, -1.77230923],
       [-2.23184756,

In [6]:
# fit the model
clf = IsolationForest(max_samples=100, random_state=rng)
clf.fit(X_train)
y_pred_train = clf.predict(X_train)
y_pred_test = clf.predict(X_test)
y_pred_outliers = clf.predict(X_outliers)

In [9]:
y_pred_test

array([ 1,  1,  1,  1, -1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,
       -1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1, -1,  1,  1,  1,
        1,  1,  1, -1, -1,  1])

In [None]:
# plot the line, the samples, and the nearest vectors to the plane
xx, yy = np.meshgrid(np.linspace(-5, 5, 50), np.linspace(-5, 5, 50))
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

plt.title("IsolationForest")
plt.contourf(xx, yy, Z, cmap=plt.cm.Blues_r)

b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white',
                 s=20, edgecolor='k')
b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green',
                 s=20, edgecolor='k')
c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red',
                s=20, edgecolor='k')
plt.axis('tight')
plt.xlim((-5, 5))
plt.ylim((-5, 5))
plt.legend([b1, b2, c],
           ["training observations",
            "new regular observations", "new abnormal observations"],
           loc="upper left")
plt.show()