In [10]:
from ipynb.fs.full.utils import *

In [11]:
from sklearn.neighbors import NearestNeighbors
from sklearn.svm import SVC
from sklearn import preprocessing as pre
import math
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA

In [22]:
class Radius_Neighbours():
    
    def __init__(self):
        pass
    
    def fit(X, y, Plotting=None):
        
        scaler = MinMaxScaler(feature_range=(0, 1))
        X_scalled = scaler.fit_transform(X)

        mean_dist, min_dist, max_dist = compute_average_distance(X_scalled)
        nbrs = NearestNeighbors(n_neighbors=len(X), radius=max_dist, algorithm='auto').fit(X_scalled)


        factor = len(X)/len(np.unique(y, return_counts=True)[1]) #Average amount of points per class.
        threshold = (min_dist+mean_dist)/2 # Avergae between min distance and average distance.
        
        distance_decision_boundary = Radius_Neighbours.get_decision_boundary(X_scalled, y)
        radius_values= np.array([Radius_Neighbours.interpolation_function(xi, factor, threshold) for xi in distance_decision_boundary])

        if Plotting:
            plotting_fit_Nearest_Neighbours(X_scalled, radius_values, scaler)


        distances = []
        indices = []
        for idx, value in enumerate(radius_values):
            distance, indice = nbrs.radius_neighbors(X_scalled[idx].reshape(1, -1), radius=value, sort_results=True)
            distances.append(distance[0])
            indices.append(indice[0])

        return distances, indices
    
    
    def predict(X, distances, indices, y, epsilon, Plotting=None):
        
        scaler = MinMaxScaler(feature_range=(0, 1))
        X_scalled = scaler.fit_transform(X)
        
        final_points = set()

        #For plotting
        plot_estimate_radius_density = []
        plot_estimate_radius_density_not = []
        plt_coor_radius_density = []
        plt_coor_not_radius_density = []
        

        #For all points.
        for index, arr in enumerate(indices):

            a = np.zeros(shape=(len(arr), 2))

            for i in range(len(arr)):
                a[i] = np.array([y[arr[i]], distances[index][i]]) #Create an array with [Y_class, distance]

            a = a[a.T[0, :].argsort()]
            a = np.split(a[:,1], np.unique(a[:, 0], return_index=True)[1][1:]) #Split per class 

            max_distance_per_class = [max(x) for x in a]
            length_per_class = [len(x) for x in a]
            sum_reach_dist_per_class = [sum(x) for x in a]
            
            amount_classes =  len(np.unique(y, return_counts=True)[1])
            if len(sum_reach_dist_per_class) != amount_classes: #If not all classes are present.
                continue #Don't take it into account.

            all_Overlapping_radius_density = True
            to_plot_radius_density = []
                
            for ind in range(amount_classes):
                if max_distance_per_class[ind] == 0 and sum_reach_dist_per_class[ind] == 0: #If the only point from the class is the point itself.
                    radius_density = 0 #Then the density is 0 for that class.
                else:
                    amount_of_points_estimate = amount_of_points(length_per_class[ind], sum(length_per_class))
                    radius_density = 1 - radius_density_estimate(sum_reach_dist_per_class[ind], sum(sum_reach_dist_per_class))
                    radius_density = (radius_density * amount_of_points_estimate)
                
                to_plot_radius_density.append(radius_density)


                if radius_density < epsilon:
                    all_Overlapping_radius_density = False
                            
            if all_Overlapping_radius_density:
                final_points.add(arr[0])
                plt_coor_radius_density.append(scaler.inverse_transform([X_scalled[arr[0]]]))
                plot_estimate_radius_density.append(min(to_plot_radius_density))
            else: 
                plt_coor_not_radius_density.append(scaler.inverse_transform([X_scalled[arr[0]]]))
                plot_estimate_radius_density_not.append(min(to_plot_radius_density))
                
        if Plotting:
            plt.scatter(plt_coor_radius_density, plot_estimate_radius_density, c='C0')
            #plt.scatter(plt_coor_not_radius_density, plot_estimate_radius_density_not, c='C0')
            plt.scatter(Plotting[0], Plotting[1], alpha=0.4, c="red")
            plt.legend(["Estimated Density", "True Density"], fontsize=8)
            plt.xlabel('Coordinates') 
            plt.ylabel('Custom Density')
            plt.title("Density Values")
            plt.show()


        #Return the final points in the interval
        overlap_nn = X_scalled[list(final_points)[:]]
        if len(overlap_nn) != 0:
            overlap_nn = scaler.inverse_transform(overlap_nn)
        return overlap_nn

    
    def fit_predict(X, y, epsilon, Plotting=None):
        distances, indices = Radius_Neighbours.fit(X, y, Plotting)
        return Radius_Neighbours.predict(X, distances, indices, y, epsilon, Plotting)
    
    def interpolation_function(value, factor, threshold):
        #The larger the factor the less the radius 
        return min(threshold, 1/np.sqrt(factor*np.abs(value)))

    def find_nearest(array, value):
        array = np.asarray(array)
        idx = (np.abs(array - value)).argmin()
        return array[idx]

    def get_decision_boundary(X, y):
        #OneVsRestClassifier()
        ovr = SVC(random_state=0).fit(X, y) #To find a solution for nbrs
        distance_to_decision_boundary = ovr.decision_function(X)

        return distance_to_decision_boundary

In [18]:
def radius_density_estimate(distance_class, total_distance):
    return distance_class/total_distance

def amount_of_points(amount_point_class, total_points):
    return amount_point_class/total_points

In [19]:
def plotting_fit_Nearest_Neighbours(X, radius_values, scaler):
    #Plotting 1D       
    from scipy.signal import argrelextrema
    if len(X[0]) == 1:
        plt.scatter(scaler.inverse_transform(X), radius_values)
        plt.xlabel('Coordinates') 
        plt.ylabel('Radius')
        plt.title("Radius Values")
        for decision_line in scaler.inverse_transform(X[argrelextrema(radius_values, np.greater, order=35)[0]].reshape(1, -1))[0]:
            plt.axvline(decision_line, color='red', linestyle="--", label="epsilon") # vertical line
        plt.legend(["Data points", "Decision Boundary"], fontsize=8)
        plt.show()

    #Plotting 2D
    if len(X[0]) == 2:
        sns.set(style = "darkgrid")
        fig = plt.figure()
        ax = fig.add_subplot(111, projection = '3d')

        x_plot = scaler.inverse_transform(X)
        x = x_plot[:, 0]
        y = x_plot[:, 1]
        z = radius_values
        c = y
        
        ax.set_xlabel("X1")
        ax.set_ylabel("X2")
        ax.set_zlabel("Radius")
        ax.scatter(x, y, z, c)
        plt.show()

In [20]:
def plot_density_estimate():
    np.random.seed(0)
    epsilon = 0
    size = 1000
    mean1 = 0  
    mean2 = 2 
    scale1 = 1
    scale2 = 1

    x1 = np.random.normal(mean1, scale1, size)
    x2 = np.random.normal(mean2, scale2, size)
    X = np.concatenate([x1, x2]).reshape(-1, 1)
    y = np.concatenate([np.ones(len(x1)), -np.ones(len(x2))])

        
    f1_distribution = stats.norm.pdf(x=X, loc=mean1, scale=scale1)
    f2_distribution = stats.norm.pdf(x=X, loc=mean2, scale=scale2)
    class_ov = (f1_distribution > epsilon) & (f2_distribution > epsilon)
    X_coor = X[class_ov]
    distances, indices = Radius_Neighbours.fit(X, y, Plotting=[X_coor, np.minimum(f1_distribution[class_ov], f2_distribution[class_ov])])
    overlap_nn = Radius_Neighbours.predict(X, distances, indices, y, epsilon, Plotting=[X_coor, np.minimum(f1_distribution[class_ov], f2_distribution[class_ov])])

In [None]:
def good_case_RNN_plot():
    np.random.seed(0)
    epsilon = 0.22
    size = 1000
    mean1 = 0  
    mean2 = 1 
    scale1 = 1
    scale2 = 1

    x1 = np.random.normal(mean1, scale1, size)
    x2 = np.random.normal(mean2, scale2, size)
    X = np.concatenate([x1, x2]).reshape(-1, 1)
    y = np.concatenate([np.ones(len(x1)), -np.ones(len(x2))])

    f1_distribution = stats.norm.pdf(x=X, loc=mean1, scale=scale1)
    f2_distribution = stats.norm.pdf(x=X, loc=mean2, scale=scale2)
    class_ov = (f1_distribution > epsilon) & (f2_distribution > epsilon)
    overlap = X[class_ov]
    
    true_interval = (0,0)
    if len(overlap) == 0: #If no true overlap
        true_interval == (0, 0)
    else: #If overlap
        true_interval = (overlap.min(), overlap.max())

    #Estimated Overlap by the Nearest Neighbour
    overlap_nn = Radius_Neighbours.fit_predict(X, y, epsilon)

    if len(overlap_nn) == 0: #If no estimated overlap
        estimated_interval = (0, 0)
    else: #If estimated overlap
        estimated_interval = (overlap_nn.min(), overlap_nn.max())

    #Calculate Score
    iou = IOU(estimated_interval, true_interval)
    dsc = DSC(estimated_interval, true_interval)

    print("Score: IOU", iou, " DSC", dsc)

    graph(x1, x2, [mean1, scale1, mean2, scale2], estimated_interval, true_interval, epsilon)

In [None]:
def bad_case_NN_plot():
    np.random.seed(0)
    epsilon = 0.10
    size = 1000
    mean1 = 0  
    mean2 = 1 
    scale1 = 1
    scale2 = 2

    x1 = np.random.normal(mean1, scale1, size)
    x2 = np.random.normal(mean2, scale2, size)
    X = np.concatenate([x1, x2]).reshape(-1, 1)
    y = np.concatenate([np.ones(len(x1)), -np.ones(len(x2))])
    
    # Get true distribution and overlap
    f1_distribution = stats.norm.pdf(x=x1, loc=mean1, scale=scale1)
    f2_distribution = stats.norm.pdf(x=x2, loc=mean2, scale=scale2)
    # Plot the distribution of two classes, overlap region and overlap points
    plt.scatter(x1, f1_distribution, alpha=0.3)
    plt.scatter(x2, f2_distribution, alpha=0.3)
    plt.legend(["x1", "x2"])
    plt.xlabel("Coordinates")
    plt.ylabel("Density")
    plt.title("Overlapping points of two normaly distributed function")
    plt.show()

    f1_distribution = stats.norm.pdf(x=X, loc=mean1, scale=scale1)
    f2_distribution = stats.norm.pdf(x=X, loc=mean2, scale=scale2)
    class_ov = (f1_distribution > epsilon) & (f2_distribution > epsilon)
    overlap = X[class_ov]
    X_coor = X[class_ov]
    
    true_interval = (0,0)
    if len(overlap) == 0: #If no true overlap
        true_interval == (0, 0)
    else: #If overlap
        true_interval = (overlap.min(), overlap.max())

    #Estimated Overlap by the Nearest Neighbour
    overlap_nn = Radius_Neighbours.fit_predict(X, y, epsilon, Plotting=[X_coor, np.minimum(f1_distribution[class_ov], f2_distribution[class_ov])])

    if len(overlap_nn) == 0: #If no estimated overlap
        estimated_interval = (0, 0)
    else: #If estimated overlap
        estimated_interval = (overlap_nn.min(), overlap_nn.max())
    
    y_true = []
    for data in X:
        if data > overlap.min() and data < overlap.max():
            y_true.append(1)
        else:
            y_true.append(0)

    iou_nn, acc_nn, y_pred = iou_acc_multiple_dim(X, overlap_nn, y_true)
    print(iou_nn)