In [None]:
from ipynb.fs.full.utils import *

In [None]:
import numpy as np
import optuna
import sklearn
from sklearn.neighbors import LocalOutlierFactor
from PyNomaly import loop
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from scipy import stats

In [None]:
class LoOP():

    def fit_predict(X, y, epsilon, k=None, Plotting=None):
        if k==None: #Estimate for K according to PyNormaly
            k=round(np.sqrt(len(X)))
    
        scaler = MinMaxScaler(feature_range=(0, 1)) #Scaller between 0 and 1
        X_scalled = scaler.fit_transform(X)
        
        final_points = []
        y_labels = np.sort(np.unique(y, return_counts=True)[0])
        dict_labels = {}
        
        for label in y_labels: 
            indexes = np.where(y==label)[0]
            data = X[list(indexes)[:]]
            q3, q1 = np.percentile(data, [75 ,25])
            iqr = q3 - q1
            bins = 2*iqr/(len(data)**(1/3))
            bins_array = np.arange(min(data), max(data)+bins, step=bins)
            full_density = np.histogram(data, bins=bins_array, density=True)[0]
            max_density = max(full_density)
            min_density = min(full_density)
            dict_labels[label] = [min_density, max_density]
        
        final_plot = []
        dict_probabilities = {}
                
        for point in X_scalled:
            sub_X = X_scalled
            sub_y = y
            
            for y_label in y_labels:
                sub_y = np.append(sub_y, y_label)
                sub_X = np.append(sub_X, point)
            
            all_Above_Epsilon = True
            probabilities = loop.LocalOutlierProbability(sub_X, extent=2, n_neighbors=k, cluster_labels=list(sub_y), progress_bar=False).fit().local_outlier_probabilities
            
            for index, label in enumerate(y_labels):
                if not label in dict_probabilities:
                    dict_probabilities[label] = [1-probabilities[-(index+1)]]
                else:
                    dict_probabilities[label].extend([1-probabilities[-(index+1)]])

                
        for key in dict_probabilities.keys():
            min_val, max_val = dict_labels[key][0], dict_labels[key][1]
            scaler_values = MinMaxScaler(feature_range=(min_val, max_val)) #Scaller between the min and max of bin size.
            dict_probabilities[key] = scaler_values.fit_transform(np.asarray(dict_probabilities[key]).reshape(-1, 1)).flatten()

        final_score = np.min(list(dict_probabilities.values()), axis=0)
        indexes = np.where(final_score >= epsilon)[0]
        
        if Plotting:
            plt.scatter(scaler.inverse_transform(X_scalled), final_score) 
            plt.scatter(Plotting[0], Plotting[1], alpha=0.4, c="red")
            plt.xlabel('Coordinates') 
            plt.ylabel('1-Percentage')
            plt.title("LoOP percentages per point")
            plt.legend(["Estimated Density", "True Density"], fontsize=8)
            plt.show()

        overlap_loop = X_scalled[list(indexes)[:]]
        if len(overlap_loop) != 0:
            overlap_loop = scaler.inverse_transform(overlap_loop)
        return overlap_loop

In [None]:
def figure_loop_estimate():
    np.random.seed(0)
    epsilon = 0
    size = 200
    mean1 = 0  
    mean2 = 2 
    scale1 = 1
    scale2 = 1

    x1 = np.random.normal(mean1, scale1, size)
    x2 = np.random.normal(mean2, scale2, size)
    X = np.concatenate([x1, x2]).reshape(-1, 1)
    y = np.concatenate([np.ones(len(x1)), -np.ones(len(x2))])

    
    # Get true distribution and overlap
    f1_distribution = stats.norm.pdf(x=x1, loc=mean1, scale=scale1)
    f2_distribution = stats.norm.pdf(x=x2, loc=mean2, scale=scale2)
    # Plot the distribution of two classes, overlap region and overlap points
    plt.scatter(x1, f1_distribution, alpha=0.3)
    plt.scatter(x2, f2_distribution, alpha=0.3)

    f1_distribution = stats.norm.pdf(x=X, loc=mean1, scale=scale1)
    f2_distribution = stats.norm.pdf(x=X, loc=mean2, scale=scale2)
    class_ov = (f1_distribution > epsilon) & (f2_distribution > epsilon)
    X_coor = X[class_ov]
    plt.scatter(X_coor, np.minimum(f1_distribution[class_ov], f2_distribution[class_ov]), alpha=1, c="red")
    plt.legend(["x1", "x2", "overlapping points"], fontsize=8)

    plt.xlabel("Coordinates")
    plt.ylabel("Density")
    plt.title("Overlapping points of two normaly distributed function")
    plt.show()
    
    f1_distribution = stats.norm.pdf(x=X, loc=mean1, scale=scale1)
    f2_distribution = stats.norm.pdf(x=X, loc=mean2, scale=scale2)
    class_ov = (f1_distribution > epsilon) & (f2_distribution > epsilon)
    X_coor = X[class_ov]


    LoOP.fit_predict(X, y, epsilon, k=None, Plotting=[X_coor, np.minimum(f1_distribution[class_ov], f2_distribution[class_ov])])