In [None]:
import sys
import os
sys.path.append('..')

import numpy as np

from MyEnsemble import MyIF, MyEIF, MyNNIF, MyDIF
from MyInterpreter import MyInterpreter

import my_datasets

import pandas as pd

from sklearn.metrics import average_precision_score

GFI function

In [None]:
def GFI(
    model_class,
    interpretation,
    random_state, 
    iter
):

    folder = model_class.__name__
    os.makedirs(folder, exist_ok=True) 
    
    for dataset_name in my_datasets.datasets_names:

        print(dataset_name)

        data_dict = my_datasets.load(dataset_name)
        X = data_dict['X']
        y = data_dict['y']

        GFIs = []

        for i in range(iter):

            model = model_class(contamination=np.mean(y), random_state=random_state+i)
            model.fit(X)
            interpreter = MyInterpreter(interpretation, model)
            GFIs.append(interpreter.GFI().reshape(-1))

        GFIs = pd.DataFrame(np.array(GFIs))

        GFIs_mean = GFIs.mean()
        GFIs_sem = GFIs.sem()

        if not os.path.isdir(f'{folder}\{interpretation}\{dataset_name}'):
            os.makedirs(f'{folder}\{interpretation}\{dataset_name}')
        GFIs.to_csv(f'{folder}\{interpretation}\{dataset_name}\{dataset_name}.csv')
        GFIs_mean.to_csv(f'{folder}\{interpretation}\{dataset_name}\{dataset_name}_mean.csv')
        GFIs_sem.to_csv(f'{folder}\{interpretation}\{dataset_name}\{dataset_name}_sem.csv')

AUC_FS functions

In [None]:
def AUC_FS(model, ranking, X, y):

    n_features = ranking.shape[0]
    AP_least = []
    AP_most = []

    X_least = X[:, ranking].copy()
    X_most  = X[:, ranking].copy()

    for _ in range(n_features):

        try:
            print(X_least.shape[1])
            print(X_most.shape[1])
            model.fit(X_least)
            AP_least.append(average_precision_score(y, model.decision_scores_))
            model.fit(X_most)
            AP_most.append(average_precision_score(y, model.decision_scores_))
            X_least = X_least[:, :-1]
            X_most  = X_most[:, 1:]
        except:
            pass

    return np.sum(AP_least) - np.sum(AP_most)

def weighted_AUC_FS(model, ranking, GFIs_mean, X, y):

    n_features = ranking.shape[0]
    AP_least = []
    AP_most = []

    GFIs_norm = np.linalg.norm(GFIs_mean)
    print(GFIs_norm)
    GFIs_mean = GFIs_mean / GFIs_norm
    GFIs_mean_perc = GFIs_mean**2
    print(GFIs_mean_perc)

    X_least = X[:, ranking].copy()
    X_most  = X[:, ranking].copy()
    GFIs_mean_perc_least = GFIs_mean_perc[ranking].copy()
    GFIs_mean_perc_most = GFIs_mean_perc[ranking].copy()

    for _ in range(n_features):

        try:
            model.fit(X_least)
            AP_least.append(np.sum(GFIs_mean_perc_least)*average_precision_score(y, model.decision_scores_))
            model.fit(X_most)
            AP_most.append(np.sum(GFIs_mean_perc_most)*average_precision_score(y, model.decision_scores_))
            X_least = X_least[:, :-1]
            X_most  = X_most[:, 1:]
            GFIs_mean_perc_least = GFIs_mean_perc_least[:-1]
            GFIs_mean_perc_most = GFIs_mean_perc_most[:-1]
        except:
            pass

    return np.sum(AP_least) - np.sum(AP_most)

def AUC_FS_(
    model_class,
    interpretation
):

    folder = model_class.__name__
    os.makedirs(folder, exist_ok=True) 
    
    for dataset_name in my_datasets.datasets_names:

        data_dict = my_datasets.load(dataset_name)
        X = data_dict['X']
        y = data_dict['y']

        print(dataset_name)

        buffer = []

        GFIs_mean = pd.read_csv(f'{folder}\{interpretation}\{dataset_name}\{dataset_name}_mean.csv', index_col=0).to_numpy().reshape(-1)
        ranking = np.argsort(-GFIs_mean)

        model = model_class(contamination=np.mean(y), random_state=0)
        buffer.append(AUC_FS(model, ranking, X, y))
        buffer = pd.DataFrame(np.array(buffer))

        if not os.path.isdir(f'{folder}\{interpretation}\{dataset_name}'):
            os.makedirs(f'{folder}\{interpretation}\{dataset_name}')
        buffer.to_csv(f'{folder}\{interpretation}\{dataset_name}\{dataset_name}_auc_fs.csv')

def weighted_AUC_FS_(
    model_class,
    interpretation
):

    folder = model_class.__name__
    os.makedirs(folder, exist_ok=True) 
    
    for dataset_name in my_datasets.datasets_names:

        data_dict = my_datasets.load(dataset_name)
        X = data_dict['X']
        y = data_dict['y']

        print(dataset_name)

        buffer = []

        GFIs_mean = pd.read_csv(f'{folder}\{interpretation}\{dataset_name}\{dataset_name}_mean.csv', index_col=0).to_numpy().reshape(-1)
        ranking = np.argsort(-GFIs_mean)

        model = model_class(contamination=np.mean(y), random_state=0)
        buffer.append(weighted_AUC_FS(model, ranking, GFIs_mean, X, y))
        buffer = pd.DataFrame(np.array(buffer))

        if not os.path.isdir(f'{folder}\{interpretation}\{dataset_name}'):
            os.makedirs(f'{folder}\{interpretation}\{dataset_name}')
        buffer.to_csv(f'{folder}\{interpretation}\{dataset_name}\{dataset_name}_weighted_auc_fs.csv')

Calls

In [None]:
GFI(MyIF, 'DIFFI', random_state=0, iter=10)

In [None]:
AUC_FS_(MyIF, "DIFFI")

In [None]:
weighted_AUC_FS_(MyIF, "DIFFI")