In [1]:
import numpy as np  
import pandas as pd
from sklearn.ensemble import IsolationForest  

n_samples = 10000  
outliers_fraction = 0.25    
n_inliers = int((1. - outliers_fraction) * n_samples)  
n_outliers = int(outliers_fraction * n_samples)  
  
rng = np.random.RandomState(123)    
X = 0.3 * rng.randn(n_inliers // 2, 2)  

X_train = np.r_[X + 2, X - 2]   
outliers = rng.uniform(low=-6, high=6, size=(n_outliers, 2))

In [10]:
print('The number of inliers is: ', n_inliers)
print('The number of outliers is: ', n_outliers)

The number of inliers is:  7500
The number of outliers is:  2500


In [11]:
X

array([[-0.32568918,  0.29920363],
       [ 0.08489355, -0.45188841],
       [-0.17358008,  0.49543096],
       ...,
       [-0.41040596,  0.65606918],
       [ 0.13142959,  0.03857035],
       [ 0.28427729, -0.05581057]])

In [2]:
X_train

array([[ 1.67431082,  2.29920363],
       [ 2.08489355,  1.54811159],
       [ 1.82641992,  2.49543096],
       ...,
       [-2.41040596, -1.34393082],
       [-1.86857041, -1.96142965],
       [-1.71572271, -2.05581057]])

In [3]:
outliers

array([[-3.64191785,  3.03951457],
       [ 3.21656468, -3.81299574],
       [-5.92952536,  0.89616577],
       ...,
       [-4.33496129,  3.65191256],
       [ 1.26733042, -1.55240204],
       [-3.73895569, -5.78858711]])

In [4]:
X_train = np.r_[X_train, outliers]  
X_train

array([[ 1.67431082,  2.29920363],
       [ 2.08489355,  1.54811159],
       [ 1.82641992,  2.49543096],
       ...,
       [-4.33496129,  3.65191256],
       [ 1.26733042, -1.55240204],
       [-3.73895569, -5.78858711]])

In [5]:
clf = IsolationForest(contamination=outliers_fraction, random_state=2018, n_jobs=-1)  

In [6]:
y_pred_train = clf.fit_predict(X_train)
y_pred_train

array([ 1,  1,  1, ..., -1, -1, -1])

In [7]:
pred = np.array(['normal' if i==1 else 'abnormal' for i in y_pred_train])
pred

array(['normal', 'normal', 'normal', ..., 'abnormal', 'abnormal',
       'abnormal'], dtype='<U8')

In [8]:
scores_pred = clf.decision_function(X_train) 
dict_ = {'anomaly_score':scores_pred, 'y_pred':y_pred_train, 'result':pred}
scores = pd.DataFrame(dict_)
print(scores.sample(5))

      anomaly_score  y_pred    result
355        0.060014       1    normal
9005      -0.111431      -1  abnormal
1525       0.071199       1    normal
8646      -0.013615      -1  abnormal
9360      -0.051253      -1  abnormal
