# Initializing and Reading CSV

In [1]:
import pandas as pd
import numpy as np

labeledComments = pd.read_csv("Sample_labaled_data.csv")

# Finding Toxicity and Storing in a List

In [2]:
toxicity = list(labeledComments["toxic"])

comments = list(labeledComments["comment_text"])

toxic_comments = []
not_toxic_comments = []

for i in range(len(comments)):
    toxic = toxicity[i]
    comment = comments[i]
    
    if toxic == 'yes':
        toxic_comments.append(comment)
    else:
        not_toxic_comments.append(comment)
        
#Limiting Amount of Elements because of Quota Limit
toxic_comments = toxic_comments[645:675]
not_toxic_comments = not_toxic_comments[645:675]

# Averaging Toxicity Score for Toxic and Non-Toxic

In [4]:
 from googleapiclient import discovery
import json

API_KEY = 'insert-api-key'

client = discovery.build(
  "commentanalyzer",
  "v1alpha1",
  developerKey=API_KEY,
  discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
  static_discovery=False,
)


#Function to find average toxicity
def avgToxic(list_of_comments):
    toxic_score = 0
    
    for comment in list_of_comments:
        analyze_request = {
              'comment': { 'text': comment },
              'requestedAttributes': {'TOXICITY': {}}
        }
        
        response = client.comments().analyze(body=analyze_request).execute()
        final = response["attributeScores"]["TOXICITY"]["spanScores"][0]["score"]["value"]
        toxic_score += final
        
    avgToxicity = toxic_score/(len(list_of_comments))
    
    return avgToxicity

#Getting AVG
avgToxicity_y = avgToxic(toxic_comments)
avgToxicity_n = avgToxic(not_toxic_comments)

In [5]:
#Creating a threshold
prob_threshold = (avgToxicity_y + avgToxicity_n)/2

prob_threshold = round(prob_threshold*100, 2)

print(f'The probable threshold for toxicity is: {prob_threshold}%')

The probable threshold for toxicity is: 46.35%


# Forming A Hypothesis and Tests


## Hypothesis
From what I learned about the way the model works and how the toxicty scores are assigned, my hypothesis is that profanity is needlessly labeled as toxic. In my opinion works like "fuck" and "shit" can be used positively depending on context, but I believe the model cannot read the context and disproportionately labels comments with profanity toxic.


## Tests
To test my hypothesis I will create a CSV of 60 comments of which I will label as toxic or not myself. Then I will use my threshold to have the model apply a "yes" or "no" label to the comments then test for false positives, false negatives, true positives, and true negatives. Each comment will be less than 15 words to prevent a long vs short bias, and comments will use gender neutral terms as well to avoid a gender bias.

## Low Sample Size
I am using a limit of 60 comments because my quota is 60 requests per minute, this limits my ability to create a larger sample size and reduce the possibility of it being an exception instead of a part of the rule.

# Testing my Hypothesis

In [6]:
test_comments = pd.read_csv("josecomments.csv")
test_comments.head()

toxicity_J = list(test_comments["toxic"])

comments_J = list(test_comments["comment"])

comment_toxic_dictionary = {}
toxic_dict_scores = {}

for comment in comments_J:
    analyze_request = {
          'comment': { 'text': comment },
          'requestedAttributes': {'TOXICITY': {}}
    }

    response = client.comments().analyze(body=analyze_request).execute()
    final = response["attributeScores"]["TOXICITY"]["spanScores"][0]["score"]["value"]
    
    final = round(final*100, 2)
   
    toxic_dict_scores[comment] = final
    
    if final > prob_threshold:
        comment_toxic_dictionary[comment] = 1
    
    elif final < prob_threshold:
        comment_toxic_dictionary[comment] = 0

In [7]:
predicted_toxicity = list(comment_toxic_dictionary.values())

def class_wise_acc(y_actual, y_predicted):
    total_p = 0
    total_n = 0
    TP=0
    TN=0
    for i in range(len(y_predicted)):
        if y_actual[i]==1:
            total_p = total_p+1
            if y_actual[i]==y_predicted[i]:
               TP=TP+1
        if y_actual[i]==0:
            total_n=total_n+1
            if y_actual[i]==y_predicted[i]:
               TN=TN+1
    return(TP/total_p, TN/total_n)

class_yes_acc_toxicity, class_no_acc_toxicity = class_wise_acc(toxicity_J, predicted_toxicity)

print(f'The accuracy score for True Positives (yes/yes) is {class_yes_acc_toxicity:.2f}.')
print(f'The accuracy score for True Negatives (no/no) is {class_no_acc_toxicity:.2f}.')

The accuracy score for True Positives (yes/yes) is 0.82.
The accuracy score for True Negatives (no/no) is 0.37.


In [11]:
import csv
with open('test_toxic_scores.csv', 'w') as f:
    for key in toxic_dict_scores.keys():
        f.write("%s,%s\n"%(key,toxic_dict_scores[key]))