In [None]:
# ==================== Globalâ€“Local Empirical Cumulative Outlier Detection (GLECOD) ==============================
# Authored by Arijit Bhattacharya


class GLECOD:
    """
    GLECOD variants:
        AFWOW - All Features, Without Weights
        AFWW  - All Features, With Weights
        SFWOW - Selected Features, Without Weights
        SFWW  - Selected Features, With Weights
    """
    def __init__(self, k=20, alpha=0.5, mode="AFWOW", weights=None, selected_features=None, random_state=42):
        self.k = k
        self.alpha = alpha
        self.mode = mode.upper()
        self.weights = weights
        self.selected_features = selected_features
        self.scores_ = None
        np.random.seed(random_state)

    def fit(self, X):
        X = pd.DataFrame(X)
        n_full, m_full = X.shape

        # === 1. Feature subset ===
        if "SF" in self.mode and self.selected_features is not None:
            X_used = X[self.selected_features].copy()
        else:
            X_used = X.copy()

        n, m = X_used.shape

        # === 2. Apply weights ===
        if "WW" in self.mode and self.weights is not None:
            w = self.weights.loc[X_used.columns].to_numpy()
            # Apply weighting differently for ECOD and kNN
            X_weighted = X_used * w
        else:
            w = np.ones(m)
            X_weighted = X_used.copy()

        X_np = X_used.to_numpy()
        Xw_np = X_weighted.to_numpy()

        # === 3. ECOD Component ===
        T = np.zeros_like(Xw_np, dtype=float)
        for j in range(m):
            col = Xw_np[:, j]
            ranks = np.argsort(np.argsort(col))
            F_j = ranks / (n - 1)
            T[:, j] = np.minimum(F_j, 1 - F_j)

        # In weighted modes, reweight ECOD contribution per feature
        if "WW" in self.mode:
            weighted_T = (T * w) / np.sum(w)
            S_ECOD = 1 - np.sum(weighted_T, axis=1)
        else:
            S_ECOD = 1 - np.mean(T, axis=1)

        # === 4. Local kNN component ===
        tree = KDTree(Xw_np if "WW" in self.mode else X_np)
        D = np.zeros(n)
        for i in range(n):
            dist, _ = tree.query((Xw_np if "WW" in self.mode else X_np)[i].reshape(1, -1), k=self.k + 1)
            D[i] = np.mean(dist[0][1:])
        S_kNN = (D - D.min()) / (D.max() - D.min() + 1e-12)

        # === 5. Combine ===
        self.scores_ = self.alpha * S_ECOD + (1 - self.alpha) * S_kNN

        # Normalize for comparability
        self.scores_ = (self.scores_ - self.scores_.min()) / (self.scores_.max() - self.scores_.min() + 1e-12)
        return self

    def decision_function(self):
        return self.scores_