In [None]:
# install packages
!pip install detoxify
!pip install lime

In [None]:
# import packages
from detoxify import Detoxify

import lime
from lime.lime_text import LimeTextExplainer

import numpy as np
import pandas as pd

import json

from google.colab import files

In [None]:
# load dataframe
df = pd.read_csv('drive/MyDrive/University/msci_project/results_toxicity_labels.csv')

# df = df[["id","comment_text","toxic"]]
df_toxic = df.loc[df['toxicity_label'] == "toxicity"]
df_nontoxic = df.loc[df['toxicity_label'] == "non_toxicity"]

In [None]:
df.head(6)

In [None]:
# create lime explainer object
class_names = ['Non-toxic','Toxic']
explainer = LimeTextExplainer(class_names=class_names)

In [None]:
# prediction function
def predict(x):
  results = Detoxify('original-small').predict(x)["toxicity"]
  results_both = np.transpose(np.array([1-np.array(results),results]))
  return results_both

In [None]:
def create_exp_dict(exp, prediction_proba, x, id):
  exp = exp.as_list()
  output = {}
  output["id"] = id
  output["comment"] = x
  if prediction_proba >= 0.75:
    output["prediction_proba"] = round(float(prediction_proba) * 100, 2)
    output["prediction_label"] = "Toxic"
  else:
    output["prediction_proba"] = round(float(1-np.array(prediction_proba)) * 100, 2)
    output["prediction_label"] = "Non-toxic"


  output["important_words"] = []
  for i in exp:
    if i[1] >0:
      iw_label = "Toxic"
    else:
      iw_label = "Non-toxic"

    output["important_words"].append({"word":i[0], "weight":abs(round(float(i[1]),4)), "label":iw_label})

  output["checked"] = False
  return output

In [None]:
def generate_explanations(row, verbose=False):
  if verbose:
    print(f"Generating explanations for {row.id}")
  exp = explainer.explain_instance(row.comment_text, predict, num_features=10, num_samples=100)
  prediction_proba = row.toxicity
  return create_exp_dict(exp, prediction_proba, row.comment_text, row.id)

In [None]:
temp_df = df.groupby("toxicity_label").sample(n=125, random_state=10).sample(frac = 1)

In [None]:
# generate explanation for each row
output = []
for row in temp_df.itertuples():
  num_alpha_chars = len([ele for ele in row.comment_text if ele.isalpha()])

  if (1-num_alpha_chars/len(row.comment_text)) < 0.30:
    exp = generate_explanations(row, True)
    output.append(exp)

In [None]:
# serializing json
json_object = json.dumps(output)
 
# writing to sample.json
with open("explanationData.json", "w") as outfile:
    outfile.write(json_object)

files.download("explanationData.json") 