In [6]:
import numpy as np
import pandas as pd
import seaborn as sns


from sklearn.neighbors import LocalOutlierFactor
from sklearn.metrics import f1_score, accuracy_score, matthews_corrcoef, precision_score, roc_auc_score, recall_score, average_precision_score

from matplotlib import pyplot as plt

In [4]:
y_train = pd.read_csv('../fraud_detection/active_datasets/y_train.csv', header=None).to_numpy().ravel()
y_test = pd.read_csv('../fraud_detection/active_datasets/y_test.csv', header=None).to_numpy().ravel()

In [5]:
X_train = pd.read_csv('../fraud_detection/active_datasets/fff_train.csv', header=None).to_numpy()
X_test = pd.read_csv('../fraud_detection/active_datasets/fff_test.csv', header=None).to_numpy()

In [7]:
def score_model(model, X_test, y_test, map = None):
    model_preds = model.predict(X_test)
    scores = {}
    if map:
        model_preds = [map[i] for i in model_preds]
    evaluation_funcs = {'accuracy':accuracy_score, 'precision':precision_score, 'recall':recall_score, 'f1':f1_score, 'MCC':matthews_corrcoef}
    for name, score in evaluation_funcs.items():
        scores[name] = score(y_pred = model_preds, y_true = y_test)
    scores['ROC-AUC'] = roc_auc_score(y_score = model_preds, y_true = y_test)
    scores['AUPR'] = average_precision_score(y_score = model_preds, y_true = y_test)
    return scores

In [8]:
LOF = LocalOutlierFactor(n_neighbors=1500, novelty=True, contamination=sum(y_train)/len(y_train))
LOF.fit(X_train, y_train)
LOF_scores = score_model(LOF, X_test, y_test, map={1:0, -1:1})

: 