In [None]:
# install packages
!pip install detoxify
!pip install lime

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# import packages
from detoxify import Detoxify

import lime
from lime.lime_text import LimeTextExplainer

import numpy as np
import pandas as pd

import json

from google.colab import files

In [None]:
# load dataframe
df = pd.read_csv('drive/MyDrive/University/msci_project/results_toxicity_labels.csv')

# df = df[["id","comment_text","toxic"]]
df_toxic = df.loc[df['toxicity_label'] == "toxicity"]
df_nontoxic = df.loc[df['toxicity_label'] == "non_toxicity"]

In [None]:
df.head(6)

Unnamed: 0,id,comment_text,toxicity,severe_toxicity,obscene,threat,insult,identity_attack,toxicity_label
0,73b10ec620eb1fba,"Ed Johnston, glad to see you take care about 1...",0.00126,6.5e-05,0.00029,7.1e-05,0.00018,0.000123,non_toxicity
1,2ff13619a98e0617,"such great intellectual talent, was a spendthr...",0.171072,0.000266,0.008619,0.000569,0.004611,0.000556,non_toxicity
2,0956590dcb1a50dd,": Not to be a back-seat driver, but shouldn't ...",0.002131,6.6e-05,0.000299,7.3e-05,0.000194,0.000127,non_toxicity
3,fe049c77cb1a4edd,:The other article had a huge talk page? Was i...,0.000718,6.9e-05,0.000186,7.3e-05,0.000144,0.000103,non_toxicity
4,358994594f577f4d,REDIRECT Talk:Issel's groove-toothed swamp rat,0.191838,0.000363,0.030032,0.000699,0.005071,0.000479,non_toxicity
5,e9b3db7f12eb8a30,"== Concentration camps == \n\n Dan, you know I...",0.000622,7e-05,0.000184,7.2e-05,0.000143,0.000105,non_toxicity


In [None]:
# create lime explainer object
class_names = ['Non-toxic','Toxic']
explainer = LimeTextExplainer(class_names=class_names)

In [None]:
# prediction function
def predict(x):
  results = Detoxify('original-small').predict(x)["toxicity"]
  results_both = np.transpose(np.array([1-np.array(results),results]))
  return results_both

In [None]:
def create_exp_dict(exp, prediction_proba, x, id):
  exp = exp.as_list()
  output = {}
  output["id"] = id
  output["comment"] = x
  if prediction_proba >= 0.75:
    output["prediction_proba"] = round(float(prediction_proba) * 100, 2)
    output["prediction_label"] = "Toxic"
  else:
    output["prediction_proba"] = round(float(1-np.array(prediction_proba)) * 100, 2)
    output["prediction_label"] = "Non-toxic"

  output["new_important_words"] = []

  output["important_words"] = []

  temp_weight = set()
  for i in exp:
    if i[1] >0:
      iw_label = "Toxic"
    else:
      iw_label = "Non-toxic"

    iw_weight = abs(round(float(i[1]),2))
    temp_weight.add(iw_weight)
    output["important_words"].append({"word":i[0], "weight":iw_weight, "label":iw_label})

  output["checked"] = False

  # Checking if all of the iw weights are 0.0, if so we skip that comment
  if temp_weight == {0.0}:
    return None
  else:
    return output

In [None]:
def generate_explanations(row, verbose=False):
  if verbose:
    print(f"Generating explanations for {row.id}")
  exp = explainer.explain_instance(row.comment_text, predict, num_features=10, num_samples=100)
  prediction_proba = row.toxicity
  return create_exp_dict(exp, prediction_proba, row.comment_text, row.id)

In [None]:
temp_df = df.groupby("toxicity_label").sample(n=200, random_state=10).sample(frac = 1, random_state=10)

In [None]:
# generate explanation for each row
output = []
for row in temp_df.itertuples():
  num_alpha_chars = len([ele for ele in row.comment_text if ele.isalpha()])

  if (1-num_alpha_chars/len(row.comment_text)) < 0.30:
    exp = generate_explanations(row, True)
    if exp == None:
      print(f"Skip explanations for {row.id}")
      continue
    
    output.append(exp)

Generating explanations for 79cdac8fd67c87a7
Generating explanations for 936284239ae65478
Skip explanations for 936284239ae65478
Generating explanations for f19d5263b02292b7
Generating explanations for 7faee242372c2735
Generating explanations for 4c930c7d6a968231
Generating explanations for a820031cea3464a0
Skip explanations for a820031cea3464a0
Generating explanations for 3d594f3dc2202f29
Skip explanations for 3d594f3dc2202f29
Generating explanations for 4b279d562cc802b5
Skip explanations for 4b279d562cc802b5
Generating explanations for 76b447ba5b919e06
Generating explanations for 746cd826521c46fb
Skip explanations for 746cd826521c46fb
Generating explanations for 633236b1e828c9d7
Generating explanations for 5c060097bc52bd88
Skip explanations for 5c060097bc52bd88
Generating explanations for 9edcbb6d68a5d3aa
Generating explanations for 929948e1f36b1c2c
Generating explanations for d7b8cb473bc07134
Skip explanations for d7b8cb473bc07134
Generating explanations for 115f0275b2e5cc0d
Skip ex

In [None]:
# serializing json
json_object = json.dumps(output)
 
# writing to sample.json
with open("explanationData.json", "w") as outfile:
    outfile.write(json_object)

files.download("explanationData.json") 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>