### Transformers-Explainability
Follow this pipeline to interpret a model's choice using different XAI methods

In [None]:
# download raw datasets (can take some time) and import dataset creation functions
%run dataset_creator.ipynb

In [None]:
# create intermediate datasets (can take a while)
text_processor = TextProcessor(None)
ac = AsyLexCleaner(text_processor=text_processor)
ac.create_all_intermediate()

In [None]:
#choose model, paragraph_selection_strategy and dataset
model_name = 'bert' #'roberta'
dataset_name = 'asylex-outcome' #'asylex-outcome' 'sentiment1' 'sentiment2'
paragraph_selection_strategy = 'first' #'rand' 'cas' 'first' 'last' '' #use the last for sentiment1 and sentiment2

In [None]:
#create the dataset
text_processor = TextProcessor(None)
dh = DatasetHandler(info=None,model=model_name,text_processor=text_processor)
dataset_path, model_path, num_labels = dh.import_paths_and_nlabels(dataset_name,model_name,paragraph_selection_strategy)
created_dataset = dh.create_dataset(dataset_name,paragraph_selection_strategy,subset_dimension=None)
dh.write_dataset(dataset_path,created_dataset)
del created_dataset

#### Proceed training the model using train_and_test.ipynb
When the training is done and the models are saved in the proper models folder, you can move on to the following stage

In [16]:
# import XAI classes and functions
%run XAI.ipynb

In [17]:
#import dataset and model
train_set,test_set,validation_set,tokenizer,model = Loader.import_dataset_and_model(dataset_name,model_name,paragraph_selection_strategy)
dataset = test_set
del test_set
del train_set

In [18]:
#use XAI methods
n_embeddings = 6 #number of embeddings to be generated. this must be >= n_sentences
n_sentences = 2
undecided_threshold = 0.4

my_lime = LIME(['negative','positive'],510,1000,clipped_heatmap=False,analytics=DataAnalysis(method_name='lime',create_plot=True,create_stats=True,relevance_threshold=0.01))
my_shap = SHA(clipped_heatmap=False,analytics=DataAnalysis(method_name='shap',create_plot=True,create_stats=True,relevance_threshold=0.1))
my_doa = DOA(analytics=DataAnalysis(method_name='diff_of_angle_pho',create_plot=True,create_stats=True,relevance_threshold=4.0))
my_rang_csm = rangedCSM(analytics=DataAnalysis(method_name='ranged_csm',create_plot=True,create_stats=True,relevance_threshold=0.35))
method_list = [my_rang_csm,my_shap] #this is the list of methods that will be applied

ac = AnalysisCreator('try1')
_ = model.to('cpu')
embeddings_manager.generate_embeddings(dataset_name,model_name,paragraph_selection_strategy,n_embeddings)
ac.load_embeddings(n_embeddings)
if torch.cuda.is_available():
    model.to('cuda')
ac.create_scores(n_sentences=n_sentences,undecided_threshold=undecided_threshold,method_list=method_list)
ac.create_comparison(method_list)
_ = model.to('cpu')

tensor([[ 2.3943, -2.0822]])
tensor([[ 2.9225, -2.5276]])
tensor([[ 2.8120, -2.4954]])
tensor([[ 2.9994, -2.6536]])
tensor([[ 2.3034, -1.6276]])
tensor([[ 2.0531, -1.6227]])


KeyboardInterrupt: 

see the results in the results folder

In [None]:
# other implemented methods
my_lime = LIME(['negative','positive'],510,1000)
my_shap = SHA(clipped_heatmap=False,analytics=DataAnalysis(create_plot=True,create_stats=True,relevance_threshold=0.1))
my_doa = DOA(analytics=DataAnalysis(create_plot=True,create_stats=True,relevance_threshold=4.0))
my_ned = NED()
my_csm = CSM()
my_new_csm = newCSM()
my_cos_sim = cosine_similarity(analytics=DataAnalysis(create_plot=True,create_stats=True,relevance_threshold=0.35)) #analytics=DataAnalysis(create_plot=True,create_stats=True,relevance_threshold=0.35)