# Basic Initialization on Dataset

Fun datasets at https://github.com/deric/clustering-benchmark/tree/master/src/main/resources/datasets/artificial

In [77]:
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import animation
from IPython.display import HTML

from sklearn.datasets import make_blobs
from sklearn.neighbors import LocalOutlierFactor

class LOFAnimation:
    def __init__(self, data, max_n=50, min_lof=1.5):
        self.data = data
        self.n_range = range(1, max_n)
        self.min_lof = min_lof
        self.fig, self.ax = plt.subplots(figsize=(8,8))
        self.anim = animation.FuncAnimation(
            self.fig, self.animate,
            init_func=self.setup_plot,
            frames=self.n_range,
            interval=500,
            blit=True
        )

    def setup_plot(self):
        self.scatter = self.ax.scatter(self.data[:,0], self.data[:,1], s=10, facecolors='k')
        self.outliers = self.ax.scatter([],[], facecolors='none', edgecolors='r')
        self.annotations = []
        self.min_x = np.min(self.data[:,0])
        self.max_x = np.max(self.data[:,0])
        self.min_y = np.min(self.data[:,1])
        self.max_y = np.max(self.data[:,1])
        return [self.scatter, self.outliers]

    def animate(self, n):
        clf = LocalOutlierFactor(n_neighbors=n)
        clf.fit_predict(self.data)
        out_idx = clf.negative_outlier_factor_ < -self.min_lof
        outliers = data[out_idx]
        outlier_lof = -clf.negative_outlier_factor_[out_idx]
        self.outliers.set_offsets(outliers)
        self.outliers.set_sizes(170*outlier_lof)
        # Clear old annotations
        for ann in self.annotations:
            ann.remove()
        self.annotations = []
        # Generate new annotations
        self.annotations.extend([
            self.ax.annotate(
                f'n_neighbors = {n}',
                (self.min_x, self.min_y)),
            self.ax.annotate(
            f'(min LOF to consider outlier = {self.min_lof})',
            (self.min_x, self.min_y-(self.max_y-self.min_y)/25))
        ])
        for i, lof in enumerate(outlier_lof):
            self.annotations.append(
                self.ax.annotate(round(lof,2), outliers[i])
            )
        return [self.scatter, self.outliers]
    
    def save(self, *args, **kwargs):
        return self.anim.save(*args, **kwargs)
    
    def show(self, *args, **kwargs):
        return HTML(self.anim.to_html5_video(*args, **kwargs))

In [78]:
data, _ = make_blobs(n_samples=100, random_state=0)
anim = LOFAnimation(data, max_n=25, min_lof=1.3)
plt.close()
anim.show()