# Anomaly Detection (PyCaret 3.x)


Using an unlabeled Credit Card Fraud Detection dataset.

In [1]:
import pycaret
pycaret.__version__

'3.3.2'

In [18]:
import pandas as pd
from pycaret.anomaly import *
df = pd.read_csv('./anomaly/fraudTrain.csv')

In [4]:
df = df.sample(n=6000, random_state=42)
print(df.shape)


(6000, 23)


In [5]:
s = setup(data=df, session_id=42, normalize=True, use_gpu=True, verbose=False)

In [7]:
iforest = create_model('iforest')
iforest

IForest(behaviour='new', bootstrap=False, contamination=0.05,
    max_features=1.0, max_samples='auto', n_estimators=100, n_jobs=-1,
    random_state=42, verbose=0)

In [8]:
models()

Unnamed: 0_level_0,Name,Reference
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
abod,Angle-base Outlier Detection,pyod.models.abod.ABOD
cluster,Clustering-Based Local Outlier,pycaret.internal.patches.pyod.CBLOFForceToDouble
cof,Connectivity-Based Local Outlier,pyod.models.cof.COF
iforest,Isolation Forest,pyod.models.iforest.IForest
histogram,Histogram-based Outlier Detection,pyod.models.hbos.HBOS
knn,K-Nearest Neighbors Detector,pyod.models.knn.KNN
lof,Local Outlier Factor,pyod.models.lof.LOF
svm,One-class SVM detector,pyod.models.ocsvm.OCSVM
pca,Principal Component Analysis,pyod.models.pca.PCA
mcd,Minimum Covariance Determinant,pyod.models.mcd.MCD


In [9]:
iforest_anomalies = assign_model(iforest)
iforest_anomalies

Unnamed: 0.1,Unnamed: 0,trans_date_trans_time,cc_num,merchant,category,amt,first,last,gender,street,...,city_pop,job,dob,trans_num,unix_time,merch_lat,merch_long,is_fraud,Anomaly,Anomaly_Score
1045211,1045211,2020-03-09 15:09:26,577588686219,fraud_Towne LLC,misc_pos,194.509995,James,Strickland,M,25454 Leonard Lake,...,972,Public relations account executive,1997-10-23,fff87d4340ef756a592eac652493cf6b,1362841766,40.420452,-78.865013,0,0,-0.006690
547406,547406,2019-08-22 15:49:01,30376238035123,fraud_Friesen Ltd,health_fitness,52.320000,Cynthia,Davis,F,7177 Steven Forges,...,217,Retail merchandiser,1928-10-01,d0ad335af432f35578eea01d639b3621,1345650541,42.758862,-123.636337,0,0,-0.009099
110142,110142,2019-03-04 01:34:16,4658490815480264,fraud_Mohr Inc,shopping_pos,6.530000,Tara,Richards,F,4879 Cristina Station,...,184,Systems developer,1945-11-04,87f26e3ea33f4ff4c7a8bad2c7f48686,1330824856,40.475159,-78.898193,0,0,-0.010508
1285953,1285953,2020-06-16 20:04:38,3514897282719543,fraud_Gaylord-Powlowski,home,7.330000,Steven,Faulkner,M,841 Cheryl Centers Suite 115,...,10717,Cytogeneticist,1952-10-13,9c34015321c0fa2ae6fd20f9359d1d3e,1371413078,43.767506,-76.542381,0,0,-0.005768
271705,271705,2019-05-14 05:54:48,6011381817520024,"fraud_Christiansen, Goyette and Schamberger",gas_transport,64.290001,Kristen,Allen,F,8619 Lisa Manors Apt. 871,...,635,Product/process development scientist,1973-07-13,198437c05676f485e9be04449c664475,1336974888,41.040394,-104.092323,0,0,-0.005744
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88364,88364,2019-02-21 20:34:12,6011652924285713,"fraud_Lang, Towne and Schuppe",kids_pets,3.190000,Kathryn,Smith,F,19838 Tonya Prairie Apt. 947,...,1847,Tax inspector,1988-10-26,69879eecd1aa5aefbeb575ca7cb6603e,1329856452,37.742783,-92.707993,0,0,-0.010230
878900,878900,2019-12-22 02:46:56,3533800906065217,fraud_Kilback and Sons,entertainment,103.669998,Amber,Thornton,F,840 Werner Lock Apt. 852,...,493,"Surveyor, minerals",1940-09-13,7f81854106e5cf9348de5af2f2aad09d,1356144416,40.934547,-98.138779,0,1,0.001486
894819,894819,2019-12-25 12:52:05,6511349151405438,fraud_Nolan-Williamson,kids_pets,5.650000,Robert,Nguyen,M,74835 Garner Point,...,450,Interpreter,1946-08-24,adb14016ba4282b38180a8611f14e11f,1356439925,38.445164,-114.631462,0,0,-0.008035
892781,892781,2019-12-24 19:23:43,30364087349027,"fraud_Ruecker, Beer and Collier",shopping_net,16.290001,Samuel,Sandoval,M,0005 Morrison Land,...,7163,Fitness centre manager,1982-02-05,c1f6f22032c65cd27a87d62ffa56bf5a,1356377023,36.695004,-95.742844,0,0,-0.008669


In [10]:
plot_model(iforest, plot = 'tsne')

In [11]:
evaluate_model(iforest)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelinâ€¦

In [12]:
df2 = pd.read_csv('./anomaly/fraudTest.csv')
df2 = df2.sample(n=2000, random_state=42)
iforest_pred = predict_model(iforest, data=df2)
iforest_pred

Unnamed: 0.1,Unnamed: 0,trans_date_trans_time_2020-03-09 15:09:26,trans_date_trans_time_2019-08-22 15:49:01,trans_date_trans_time_2019-03-04 01:34:16,trans_date_trans_time_2020-06-16 20:04:38,trans_date_trans_time_2019-05-14 05:54:48,trans_date_trans_time_2019-06-28 10:46:05,trans_date_trans_time_2020-01-23 09:09:59,trans_date_trans_time_2020-01-30 11:23:44,trans_date_trans_time_2019-03-11 10:35:53,...,trans_num_7f81854106e5cf9348de5af2f2aad09d,trans_num_adb14016ba4282b38180a8611f14e11f,trans_num_c1f6f22032c65cd27a87d62ffa56bf5a,trans_num_1764c1b633225d929ab8666795e3d096,unix_time,merch_lat,merch_long,is_fraud,Anomaly,Anomaly_Score
119106,-1.441642,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,...,-0.012911,-0.012911,-0.012911,-0.012911,2.022850,0.548693,1.356760,-0.074367,0,-0.002754
179292,-1.280488,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,...,-0.012911,-0.012911,-0.012911,-0.012911,2.166442,-0.643578,-0.392627,-0.074367,0,-0.008669
540729,-0.312703,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,...,-0.012911,-0.012911,-0.012911,-0.012911,3.024982,0.283838,-0.195577,-0.074367,0,-0.007377
374360,-0.758173,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,...,-0.012911,-0.012911,-0.012911,-0.012911,2.726174,-0.334403,-0.460555,-0.074367,0,-0.008669
314574,-0.918256,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,...,-0.012911,-0.012911,-0.012911,-0.012911,2.548033,0.486368,-0.326410,-0.074367,0,-0.005739
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
413784,-0.652611,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,...,-0.012911,-0.012911,-0.012911,-0.012911,2.836007,1.533095,-1.031554,-0.074367,0,-0.005751
401511,-0.685473,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,...,-0.012911,-0.012911,-0.012911,-0.012911,2.803675,-0.039339,0.723573,-0.074367,0,-0.002299
493596,-0.438906,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,...,-0.012911,-0.012911,-0.012911,-0.012911,2.955603,1.302528,0.394590,-0.074367,0,-0.004416
47480,-1.633428,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,-0.012911,...,-0.012911,-0.012911,-0.012911,-0.012911,1.847859,0.110212,0.880120,-0.074367,0,-0.011416


In [17]:
print(iforest_pred['Anomaly'].tolist())

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [13]:
save_model(iforest, 'iforest_pipeline')

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=Memory(location=None),
          steps=[('numerical_imputer',
                  TransformerWrapper(include=['Unnamed: 0', 'cc_num', 'amt',
                                              'zip', 'lat', 'long', 'city_pop',
                                              'unix_time', 'merch_lat',
                                              'merch_long', 'is_fraud'],
                                     transformer=SimpleImputer())),
                 ('categorical_imputer',
                  TransformerWrapper(include=['trans_date_trans_time',
                                              'merchant', 'category', 'first',
                                              'last', 'gender'...
                                                                     'category',
                                                                     'first',
                                                                     'last',
                                                          

In [14]:
iforest

IForest(behaviour='new', bootstrap=False, contamination=0.05,
    max_features=1.0, max_samples='auto', n_estimators=100, n_jobs=-1,
    random_state=42, verbose=0)