In [1]:
import experiments
import pickle
import shap

import numpy as np
from scipy.stats import pearsonr, rankdata
import itertools
from scipy.special import comb
import pandas as pd

In [2]:
description = "Exponential function applied to tokens in the TF-IDF vectorised training matrix (1) tagged as not indicative in offensive and hate-speech related human rationales and (2) and not unique to sentences labelled as normal [b]"
hatexplain = experiments.HateXplainExperiments(description)
dataset = hatexplain.prepare_dataset()
hatexplain.prepare_properties(dataset)
X_train_vect = hatexplain.preprocess_training_data_option_two()
#hatexplain.get_performance_metrics(X_train_vect)

In [4]:
with open('best_algorithm.sav', 'rb') as file:
    best_algorithm = pickle.load(file)

In [5]:
explainer = shap.KernelExplainer(best_algorithm.predict_proba, shap.sample(X_train_vect, 10), model_output='probability')
shap_values = explainer.shap_values(hatexplain.X_test_vect)

  0%|          | 0/4030 [00:00<?, ?it/s]

In [None]:
shap_values

In [6]:
base = []
features = hatexplain.vectorizer.vocabulary_.keys()
for sentence, global_rationale in zip(hatexplain.test_sentences, hatexplain.test_rationales):
    individual_base = np.zeros(len(features))
    if any(i == 1 for i in global_rationale):
        for token, rationale in zip(sentence, global_rationale):
            if rationale == 1 and token in hatexplain.vectorizer.vocabulary_:
                idx = hatexplain.vectorizer.vocabulary_.get(token)
                individual_base[idx] = 1
    base.append(individual_base) 

In [7]:
# Example data, where attrA and attrB are lists of lists
attrA = shap_values[0]
attrB = base

n_datapoints = len(attrA)
n_feat = len(attrA[0])

# Initialize empty arrays to store ranks
all_feat_ranksA = np.zeros((n_datapoints, n_feat))
all_feat_ranksB = np.zeros((n_datapoints, n_feat))

# Calculate ranks for each data entry
for i in range(n_datapoints):
    all_feat_ranksA[i, :] = rankdata(-np.abs(attrA[i]), method='dense')
    all_feat_ranksB[i, :] = rankdata(-np.abs(attrB[i]), method='dense')

In [None]:
# Initialize an array to store the count of pairs with the same relative ranking
feat_pairs_w_same_rel_rankings = np.zeros(n_datapoints)

# Count pairs with the same relative ranking for each data entry
for i in range(n_datapoints):
    for feat1, feat2 in itertools.combinations_with_replacement(range(n_feat), 2):
        if feat1 != feat2:
            rel_rankingA = all_feat_ranksA[i, feat1] < all_feat_ranksA[i, feat2]
            rel_rankingB = all_feat_ranksB[i, feat1] < all_feat_ranksB[i, feat2]
            feat_pairs_w_same_rel_rankings[i] += rel_rankingA == rel_rankingB

# Calculate pairwise agreement for each data entry
pairwise_distr = feat_pairs_w_same_rel_rankings / comb(n_feat, 2)