In [301]:
import numpy as np
from eilof_model import EILOF

import kagglehub
import os

# Download latest version
path = kagglehub.dataset_download("mlg-ulb/creditcardfraud")
import pandas as pd

# Assuming a CSV file is in the dataset directory
csv_file = os.path.join(path, "creditcard.csv")  # Replace with the actual file name
data = pd.read_csv(csv_file)

# Display the first few rows of the dataset
data.head()
import pandas as pd

# Step 1: Identify outliers and non-outliers
outliers = data[data['Class'] == 1]
non_outliers = data[data['Class'] == 0]

# Step 2: Calculate the number of non-outliers required
n_outliers = len(outliers)  # Total number of outliers
required_total = n_outliers / 0.05  # Total rows needed for 5% outliers
required_non_outliers = int(required_total - n_outliers)

# Step 3: Randomly sample the required number of non-outliers
non_outliers_sampled = non_outliers.sample(n=required_non_outliers, random_state=42)

# Step 4: Combine outliers and sampled non-outliers
subset = pd.concat([outliers, non_outliers_sampled])

# Step 5: Maintain time-series order
subset = subset.sort_index()

# Step 6: Verify the outlier ratio
actual_outlier_ratio = len(subset[subset['Class'] == 1]) / len(subset)
print(f"Outlier Ratio in Subset: {actual_outlier_ratio:.2%}")
# Display the subset
subset.head()

from sklearn.preprocessing import StandardScaler
data = subset.drop(columns=['Class'])
label = subset["Class"]

# Initialize selected and unselected datasets
selected_data = data[:1000].reset_index(drop=True).values
selected_labels = label[:1000].reset_index(drop=True).values
unselected_data = data[1000:2280].reset_index(drop=True).values
unselected_labels = label[1000:2280].reset_index(drop=True).values
scaler = StandardScaler()
selected_data = scaler.fit_transform(selected_data)
scaler = StandardScaler()
unselected_data = scaler.fit_transform(unselected_data)
data = selected_data
new_point = unselected_data[:640]



# Step 1: Test Initialization
model = EILOF(k=100)
print("Model initialized with k =", model.k)

# Step 2: Test Fitting the Model
model.fit(data)
# print("Model fitted. Local Reachability Densities (LRDs):")
print(model.lrd)

lof_scores = model.update(new_point)
print("After updating with a new point:")
print("Updated LOF Scores:", lof_scores)
lof_label = model.predict_labels(threshold=95)
print("Predicted Labels:", lof_label)
lof_label_ref = model.predict_reference_labels(threshold=95)

Outlier Ratio in Subset: 5.00%
Model initialized with k = 100
[0.28323774 0.30239357 0.2276842  0.29840228 0.28379979 0.21288414
 0.28770341 0.2558804  0.21168744 0.27664269 0.2913425  0.26699909
 0.22108926 0.26355875 0.31346718 0.30884537 0.27136127 0.28759602
 0.30369913 0.24357104 0.20759869 0.28318504 0.27069818 0.17003506
 0.17078703 0.22342076 0.29210153 0.29933461 0.30639379 0.24267463
 0.26626321 0.28452082 0.29657269 0.30169604 0.19275161 0.27462667
 0.27784744 0.25337465 0.11132921 0.30246364 0.28825591 0.25776811
 0.28496904 0.24578906 0.28522394 0.251107   0.29487198 0.27981133
 0.27532613 0.2650753  0.27747407 0.27540173 0.28418009 0.27700957
 0.27919838 0.29590357 0.27341654 0.30617109 0.24621497 0.2553917
 0.28086359 0.1955265  0.29666095 0.28160626 0.16962595 0.24014127
 0.2758568  0.12506368 0.31403413 0.29466474 0.2417623  0.27584761
 0.30953743 0.29349746 0.2976008  0.29896569 0.2740052  0.2423334
 0.26980921 0.24457808 0.29685662 0.31793295 0.29564014 0.27629525
 0

In [302]:
import importlib
import eilof_model
importlib.reload(eilof_model)  # Reload the module to ensure the latest changes are loaded

<module 'eilof_model' from '/Users/lucchen/Desktop/Research/Rui Hu Research/code/eilof_model.py'>

In [311]:
len(lof_label)

1640

In [310]:
np.where(lof_label == 1)

(array([  38,  187,  229,  271,  296,  303,  338,  359,  367,  368,  408,
         429,  441,  523,  524,  525,  530,  532,  533,  534,  537,  539,
         548,  549,  550,  553,  593,  607,  614,  616,  619,  622,  625,
         667,  689,  709,  716,  758,  792,  814,  929,  973,  975,  976,
        1061, 1090, 1131, 1145, 1146, 1165, 1180, 1220, 1376, 1408, 1417,
        1424, 1430, 1433, 1439, 1440, 1448, 1453, 1454, 1455, 1457, 1464,
        1466, 1467, 1473, 1474, 1476, 1490, 1538, 1547, 1551, 1570, 1598,
        1605, 1612, 1618, 1631, 1639]),)

In [305]:
model.reachability_dist[950,:][:100]

array([ 3.81784284,  3.83697034,  5.50669073,  3.86879517,  4.45658341,
        5.93126974,  4.63088738,  4.74671854,  7.09340564,  5.09033494,
        4.33774325,  4.74072875,  6.33925448,  4.97477502,  4.06552046,
        4.23822354,  4.15029217,  4.75015531,  3.9883058 ,  5.23198265,
        5.51201891,  4.7570251 ,  4.20118478,  6.46905633,  6.82430391,
        5.3570753 ,  4.10004201,  4.20934922,  3.83391443,  5.5946725 ,
        4.6964351 ,  3.91864394,  4.72303188,  4.21782798,  6.36903628,
        4.47218371,  4.87699927,  5.24948783, 10.27521262,  3.90519981,
        4.28953987,  5.06529856,  4.02736101,  5.28406291,  4.34394416,
        4.9498604 ,  4.27836323,  4.37852981,  4.4391965 ,  5.55138493,
        4.30334008,  4.59781351,  4.76203527,  3.91433191,  4.66970893,
        4.4290676 ,  4.46475769,  4.27424701,  5.0044454 ,  4.92557395,
        4.49029848,  6.22450461,  3.93193661,  4.66063281,  6.88602653,
        5.4470938 ,  3.97530309,  8.6962159 ,  4.06217648,  4.83

In [306]:
model.lrd[:100]

array([0.51422958, 0.54170277, 0.39604118, 0.55183793, 0.49486841,
       0.3055532 , 0.53985157, 0.41045333, 0.33230398, 0.32584787,
       0.55051528, 0.42833131, 0.299409  , 0.4878497 , 0.55624744,
       0.5239078 , 0.42049253, 0.46374274, 0.59217108, 0.3645607 ,
       0.55047515, 0.40888352, 0.37545118, 0.42580109, 0.30083663,
       0.27389164, 0.56113204, 0.48674517, 0.57609704, 0.50564818,
       0.54921866, 0.55611345, 0.48282379, 0.47019015, 0.2330555 ,
       0.45276866, 0.63675979, 0.43553993, 0.11132921, 0.70755574,
       0.51660117, 0.41768972, 0.49426889, 0.32158   , 0.44535357,
       0.33494861, 0.56997886, 0.46019826, 0.50791096, 0.2650753 ,
       0.5027742 , 0.52522797, 0.50200798, 0.43512282, 0.46795054,
       0.45370218, 0.49081056, 0.5243414 , 0.42481446, 0.4333999 ,
       0.45071249, 0.1955265 , 0.63974958, 0.41419193, 0.29302368,
       0.40010568, 0.4291889 , 0.12506368, 0.47930441, 0.44670247,
       0.34517443, 0.31058942, 0.58117835, 0.63234615, 0.44810

In [307]:
model.lof_scores[-100:]

array([1.400425  , 1.13966714, 1.24397304, 1.5041029 , 1.29364264,
       1.40296478, 1.08773364, 2.34863216, 1.13576879, 1.14608337,
       1.24150706, 4.67435497, 1.2138693 , 1.7093196 , 1.17397419,
       1.79376143, 1.42622079, 1.16523481, 1.25649832, 2.07124047,
       1.21727789, 1.41004517, 1.18892615, 2.02230649, 1.11328487,
       1.42459016, 1.21430622, 1.16034723, 1.36737277, 1.43415458,
       2.40228612, 1.22463961, 1.15366018, 1.15195153, 1.47706015,
       1.35930437, 1.11323473, 1.2251417 , 1.16436816, 1.14211077,
       1.16223469, 1.19041084, 1.52195622, 1.52561907, 1.34014728,
       1.41608042, 1.46619076, 2.08734249, 1.27997216, 1.27838434,
       1.22355524, 1.2774883 , 1.23385889, 1.43895994, 1.11412185,
       1.44265194, 1.11288123, 1.19131615, 2.7070071 , 1.15695077,
       1.19699242, 1.16182905, 1.28085324, 1.12416447, 1.2607771 ,
       3.08015073, 1.2354842 , 1.21407425, 1.117251  , 1.24446932,
       1.26377199, 1.14804105, 2.23719947, 1.11382972, 1.36097