In [None]:
import numpy as np 
import pandas as pd
from scipy.stats import zscore
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor 

In [None]:
# In tool.ipynb

class outlier_manager:

    @staticmethod
    def calculate_z_score(df_full, target_matrix, threshold = 4):
        print(f"Calculating Z-Score with threshold {threshold}")
        # Calculate Z-Score
        z_scores = np.abs(zscore(target_matrix, axis=0))
        z_outliers = (z_scores > threshold).any(axis=1)
        
        # Filter out the outliers
        df_full_cleaned_1 = df_full[~z_outliers]
        matrix_cleaned_1 = target_matrix[~z_outliers, :]
        
        print(f"Number of outliers detected: {np.sum(z_outliers)}")
        return df_full_cleaned_1, matrix_cleaned_1, z_outliers

    @staticmethod
    def calculate_isolation(df_full, target_matrix, contamination = 0.1):
        print(f"Calculating Isolation Forest with contamination {contamination}")
        iso_forest = IsolationForest(contamination = contamination)
        iso_preds = iso_forest.fit_predict(target_matrix)
        iso_outliers = iso_preds == -1
        
        # Filter out the outliers
        df_full_cleaned_2 = df_full[~iso_outliers]
        matrix_cleaned_2 = target_matrix[~iso_outliers, :]
        
        print(f"Number of outliers detected: {np.sum(iso_outliers)}")
        return df_full_cleaned_2, matrix_cleaned_2, iso_outliers

    @staticmethod
    def calculate_lof(df_full, target_matrix , n_neighbors = 20, contamination = 'auto'):
        print(f"Calculating Local Outlier Factor with n_neighbors {n_neighbors} and contamination {contamination}")
        lof = LocalOutlierFactor(n_neighbors = n_neighbors, contamination = contamination)
        lof_preds = lof.fit_predict(target_matrix)
        lof_outliers = lof_preds == -1
        
        # Filter out the outliers
        df_full_cleaned_3 = df_full[~lof_outliers]
        matrix_cleaned_3 = target_matrix[~lof_outliers, :]
        
        print(f"Number of outliers detected: {np.sum(lof_outliers)}")
        return df_full_cleaned_3, matrix_cleaned_3, lof_outliers


In [None]:
class static_outlier():

    @staticmethod
    def calculate_z(df_target, target_matrix): 
        ZScore_v0_df_target, ZScore_v0_target_matrix_z, ZScore_v0_z_outliers = outlier_manager.calculate_z_score(df_full       = df_target, 
                                                                                                target_matrix = target_matrix, 
                                                                                                threshold     = 4)    
        ZScore_v1_df_target, ZScore_v1_target_matrix_z, ZScore_v1_z_outliers = outlier_manager.calculate_z_score(df_full       = df_target, 
                                                                                                target_matrix = target_matrix, 
                                                                                                threshold     = 10)    
        z_List = ZScore_v0_df_target, ZScore_v0_target_matrix_z, ZScore_v0_z_outliers, ZScore_v1_df_target, ZScore_v1_target_matrix_z, ZScore_v1_z_outliers 
        return z_List  
    
    @staticmethod
    def calculate_iso(df_target, target_matrix): 
        ISO_v0_df_target, ISO_v0_target_matrix_iso, ISO_v0_iso_outliers = outlier_manager.calculate_isolation(df_full       = df_target, 
                                                                                                        target_matrix = target_matrix, 
                                                                                                        contamination = 0.1)    
        ISO_v1_df_target, ISO_v1_target_matrix_iso, ISO_v1_iso_outliers = outlier_manager.calculate_isolation(df_full       = df_target, 
                                                                                                        target_matrix = target_matrix, 
                                                                                                        contamination = 0.2)    
        iso_List = ISO_v0_df_target, ISO_v0_target_matrix_iso, ISO_v0_iso_outliers, ISO_v1_df_target, ISO_v1_target_matrix_iso, ISO_v1_iso_outliers 
        return iso_List 
    
    @staticmethod
    def calculate_lof(df_target, target_matrix): 
        LOF_v0_df_target, LOF_v0_target_matrix_lof, LOF_v0_lof_outliers = outlier_manager.calculate_lof(df_full       = df_target, 
                                                                                                 target_matrix = target_matrix, 
                                                                                                 n_neighbors   = 20, 
                                                                                                 contamination = 'auto')    
        LOF_v1_df_target, LOF_v1_target_matrix_lof, LOF_v1_lof_outliers = outlier_manager.calculate_lof(df_full       = df_target, 
                                                                                                 target_matrix = target_matrix, 
                                                                                                 n_neighbors   = 40, 
                                                                                                 contamination = 'auto')    
        lof_List = LOF_v0_df_target, LOF_v0_target_matrix_lof, LOF_v0_lof_outliers, LOF_v1_df_target, LOF_v1_target_matrix_lof, LOF_v1_lof_outliers 
        return lof_List  
