# Toxic comment detection using SVM and different vectorization features

In [14]:
import re
import numpy as np
import scipy
import scipy.sparse as sp
import pandas as pd
import pickle
from sklearn import svm
from sklearn.metrics import roc_auc_score
from nltk.sentiment import SentimentIntensityAnalyzer
from collections import defaultdict
from nltk.tokenize import word_tokenize
import nltk
import time
from sklearn.preprocessing import normalize
import matplotlib.pyplot as plt
from nltk import ngrams
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from scipy.sparse import csr_matrix
from sklearn import preprocessing

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('vader_lexicon')

#List of profane words 
profane_list = list(pd.read_csv("data/profane_list2.csv")['name'])

[nltk_data] Downloading package punkt to /home/krise/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/krise/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/krise/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Code based on https://github.com/bmeaut/python_nlp_2018_spring/blob/master/course_material/14_Semantics_II/14_Semantics_2_lab.ipynb

In [15]:
#Custom featurizer calss
class Featurizer():
    sa = SentimentIntensityAnalyzer() #vader sentiment analyzer for sentence-sentiment feature
    feature_functions = [] #for holding function names that the Featurizer shall apply
    
    #potential feature functions:
    @staticmethod
    def number_of_profanities(text):
        n = 0
        words_tokenized = word_tokenize(text)
        for word in words_tokenized:
            if (word in profane_list):
                n += 1
        yield ("number of profanities", n / len(words_tokenized))  
        
    @staticmethod
    def pos_tags(text):
        word_tag_tuples = nltk.pos_tag(word_tokenize(text))
        tag_count_dict = {}
        for word_tag_tuple in word_tag_tuples:
            if(word_tag_tuple[1] in tag_count_dict.keys()):
                tag_count_dict[word_tag_tuple[1]] += 1
            else:
                tag_count_dict[word_tag_tuple[1]] = 1

        for tag in tag_count_dict:
            yield (tag, tag_count_dict[tag] / len(word_tag_tuples))

    @staticmethod
    def vader_sentiment(text):
        sentiments = Featurizer.sa.polarity_scores(text)
        for key in sentiments:
            yield("vader " + key, sentiments[key])
            
    @staticmethod
    def comment_length(text):
        yield ("text length", len(text))
            
    @staticmethod
    def percentage_uppercase(text):
        yield ("% uppercase", sum(1 for c in text if c.isupper()) /len(text))
            
    @staticmethod
    def pos_ngrams(text):
        n = 2
        tokens = word_tokenize(text)
        tag_count_dict = {}
        
        l = len(tokens)
        word_tag_tuples = nltk.pos_tag(tokens)
        for x in range(0, l - n + 1):
            pos_ngram = ""
            for i in range(0, n):
                pos_ngram += word_tag_tuples[x + i][1] + " "
            if(pos_ngram in tag_count_dict.keys()):
                tag_count_dict[pos_ngram] += 1
            else:
                tag_count_dict[pos_ngram] = 1

        for tag in tag_count_dict:
            yield (tag, tag_count_dict[tag] / (len(word_tag_tuples) / 2))
            
    @staticmethod
    def char_1grams(text):
        chars = set(text)
        for char in chars:
            yield("c1: " + "".join(char), text.count(char) / len(text))
            
    @staticmethod
    def char_2grams(text):
        bigramset = set()
        for ngram in ngrams(text, 2):
            bigramset.add(ngram)

        for bigram in bigramset:
            bigramstring = "".join(bigram)
            yield ("c2: " + bigramstring, text.count(bigramstring) / (len(text) / 2))
            
    @staticmethod
    def char_3grams(text):
        trigramset = set()
        for ngram in ngrams(text, 3):
            trigramset.add(ngram)

        for trigram in trigramset:
            trigramstring = "".join(trigram)
            yield ("c3: " + trigramstring, text.count(trigramstring) / (len(text) / 3))

    #Either start completely new feature/id dictionary if only features of this custom featurizer will be used
    #or base dictionary on that of a different featurizer so they can be used together (e.g sklearns TFIDF vectorizer)
    def __init__(self, foreign_features = None):
        if (foreign_features == None): #Will create standalone dictionaries
            self.features = {}
            self.features_by_id = {}
            self.next_feature_id = 0
            self.max_foreign_feature = 0
        else: #Some other vectorizer will be used in addition to this one
            self.features = foreign_features
            self.features_by_id = {v: k for k, v in foreign_features.items()}
            self.next_feature_id = max(foreign_features.values()) + 1
            self.max_foreign_feature = self.next_feature_id

    def to_sparse(self, events):
        """convert sets of ints to a scipy.sparse.csr_matrix"""
        data, row_ind, col_ind = [], [], []
        for event_index, event in enumerate(events):
            for feature, value in event:
                if (value != None):
                    data.append(value)
                else:
                    data.append(1)
                    
                row_ind.append(event_index)
                #foreign features will be 0 if only this featurizer is used
                col_ind.append(feature - self.max_foreign_feature) 
                
        n_features = len(self.features.keys()) - self.max_foreign_feature
        n_events = len(events)
        matrix = scipy.sparse.csr_matrix(
            (data, (row_ind, col_ind)), shape=(n_events, n_features))
        return matrix

    def featurize(self, dataset, allow_new_features=False, verbose = False):
        events, labels = [], []
        n_events = len(dataset)
        for c, (text, label) in enumerate(dataset):
            if (verbose):
                if c % 10000 == 0:
                    print("{0:.0%}...".format(c/n_events), end='')
            labels.append(label)
            events.append(set())
            for function_name in Featurizer.feature_functions:
                function = getattr(Featurizer, function_name)
                for feature, value in function(text):
                    if feature not in self.features:
                        if not allow_new_features:
                            continue
                        self.features[feature] = self.next_feature_id
                        self.features_by_id[self.next_feature_id] = feature
                        self.next_feature_id += 1
                    feat_id = self.features[feature]
                    events[-1].add((feat_id, value))
                    
        events_sparse = self.to_sparse(events)
        labels_array = np.array(labels)
        print('done!')
        
        #min_max_scaler = preprocessing.MaxAbsScaler()
        #events_sparse = min_max_scaler.fit_transform(events_sparse)

        return events_sparse, labels_array

    def get_feature_names(self):
        return list([k for k, v in sorted(self.features.items(), key=lambda item: item[1])])
    
    def get_featurname_to_id_dict(self):
        return self.features
        
    def get_id_to_featurname_dict(self):
        return self.features_by_id
        
    #merge two feature matrices
    def merge_feature_matrices(self, foreign_matrix, inherent_matrix):
        return sp.hstack([foreign_matrix, inherent_matrix])
    
    def print_sample_with_feature_names(self, feature_matrix, sample_id):
        sample = feature_matrix.getrow(sample_id)
        nonzero_idxs = sample.nonzero()[1]
        for idx in nonzero_idxs:
            print(self.features_by_id[idx].rjust(30, ' ') , ":" , sample.getcol(idx).toarray()[0][0])

In [16]:
# function for plotting most important features for SVM in a plot(works only for linear kernel)
def plot_coefficients(classifier, feature_names, top_features=20):
    #feature_names*=3
    coef = classifier.coef_.ravel()
    top_positive_coefficients = np.argsort(-coef)[:top_features]
    top_negative_coefficients = np.argsort(-coef)[-top_features:]
    top_coefficients = np.hstack([top_positive_coefficients, top_negative_coefficients])
    # create plot
    plt.figure(figsize=(3, 10))
    colors = ['green' if c < 0 else 'red' for c in coef[top_coefficients]]
    plt.barh(np.arange(2 * top_features), coef[top_coefficients], color=colors)
    feature_names = np.array(feature_names)
    plt.yticks(np.arange(0,2 * top_features), feature_names[top_coefficients], ha='right')
    plt.show()

In [17]:
#best results so far
base = {"1 precision": 0.66, "1 recall" : 0.58, "1 f-score" :0.61, "0 precision": 0.96, \
        "0 recall" : 0.97, "0 f-score" :0.97, "acc" : 0.9422, "Av.rec": 0.5169, "auc": 0.8101}

#print results and save some examples of correctly and incorrectly classified comments to disk
def evaluate(predictions, dev_labels, valid_df, model, featurizer, plot):
    stats_by_label = defaultdict(lambda: defaultdict(int))
    if (isinstance(predictions, np.ndarray)):
        predictions = predictions.tolist()
    for i, gold in enumerate(dev_labels):
        auto = predictions[i]
        # print(auto, gold)
        if auto == gold:
            stats_by_label[auto]['tp'] += 1
        else:
            stats_by_label[auto]['fp'] += 1
            stats_by_label[gold]['fn'] += 1

    print("{:>8} {:>8}  {:>8}    {:>8}         {:>8}             {:>8}".format(
        'label', 'n_true', 'n_tagged', 'precision', 'recall', 'F-score'))
    for label, stats in stats_by_label.items():
        all_tagged = stats['tp'] + stats['fp']
        stats['prec'] = stats['tp'] / all_tagged if all_tagged else 0
        all_true = stats['tp'] + stats['fn']
        stats['rec'] = stats['tp'] / all_true if all_true else 0
        stats['f'] = (2 / ((1/stats['prec']) + (1/stats['rec']))
                      if stats['prec'] > 0 and stats['rec'] > 0 else 0)

        print("{:>8} {:>8} {:>8} {:>8.2f} / {:<8.2f} {:>8.2f} / {:<8.2f} {:>8.2f} / {:<8.2f}".format(
            label, all_true, all_tagged, stats['prec'], base[str(label) + " precision"], stats['rec'], 
            base[str(label) + " recall"], stats['f'], base[str(label) + " f-score"]))

    accuracy = (
        sum([stats_by_label[label]['tp'] for label in stats_by_label]) /
        len(predictions)) if predictions else 0

    av_rec = sum([stats['rec'] for stats in stats_by_label.values()]) / 3
    f_pn = (stats_by_label['positive']['f'] +
            stats_by_label['negative']['f']) / 2

    print()
    print("{:>10} {:>.4f} / {:<.4f}".format('Acc:', accuracy, base["acc"]))
    print("{:>10} {:>.4f} / {:<.4f}".format('Av.rec:', av_rec, base["Av.rec"]))
    print("{:>10} {:>.4f} / {:<.4f}".format('AUC :', roc_auc_score(predictions, dev_labels), base["auc"]))
    print("-----------------------------")
    
    incorrectly_classified_msk = [ x != y for (x,y) in zip(predictions, list(valid_df['target']))]
    correctly_classified_msk = [not x for x in incorrectly_classified_msk]
    incorrectly_classified = valid_df[incorrectly_classified_msk]
    correctly_classified = valid_df[correctly_classified_msk]
    
    incorrectly_classified_ambig = incorrectly_classified.loc[(incorrectly_classified['toxicity'] > 0.4)\
                                                              & (incorrectly_classified['toxicity'] < 0.6)] 
    print("From", len(incorrectly_classified), "incorrectly classified,", len(incorrectly_classified_ambig),\
          "are ambiguous", "(", len(incorrectly_classified_ambig) / len(incorrectly_classified) *100, "%)")
    
    incorrectly_classified.loc[(incorrectly_classified['toxicity'] <= 0.4)\
                                | (incorrectly_classified['toxicity'] >= 0.6)].head(100).to_csv("examples/incorrect.csv")
    correctly_classified.loc[(correctly_classified['toxicity'] <= 0.4)\
                                | (incorrectly_classified['toxicity'] >= 0.6)].head(100).to_csv("examples/correct.csv")
    print("saved (non-ambiguous) examples for correctly and incorrectly classified comments to /examples" )
    
    if (plot):
        plot_coefficients(model, featurizer.get_feature_names())

Reading in data with pandas

In [37]:
fields = ['comment_text', 'toxicity','target']

train_df = pd.read_csv("data/pre_processed/train_lemma_nopunct_cleaned_sentencized.csv", usecols = fields)
#train_df = pd.read_csv("experimental/train_negations_fliped.csv", usecols = fields)
train_df = train_df[:int(len(train_df) * 1/4)]
valid_df = pd.read_csv("data/pre_processed/valid_lemma_nopunct_cleaned_sentencized.csv", usecols = fields)
#valid_df = pd.read_csv("experimental/valid_negations_fliped.csv", usecols = fields)
test_df = pd.read_csv("data/pre_processed/test_lemma_nopunct_cleaned_sentencized.csv", usecols = fields)
test_df.dropna(inplace = True)


#train_df = train_df[:100]
#valid_df = valid_df[:100]

train = list(zip(list(train_df['comment_text']), list(train_df['target'])))
valid = list(zip(list(valid_df['comment_text']), list(valid_df['target'])))

In [38]:
def train_and_evaluate(train_events, train_labels, valid_events, valid_labels, valid_df, featurizer, plot = False):
    print('training...')
    start = time.time()
    model = svm.LinearSVC(max_iter = 10000)
    model.fit(train_events, train_labels)
    end = time.time()
    print("Training took:", int((end-start)), "sec")

    start = time.time()
    predicted_labels = model.predict(valid_events)
    end = time.time()
    print("Predicting took:", int((end-start)), "sec")

    evaluate(predicted_labels, valid_labels, valid_df, model, featurizer, plot)
    return featurizer, train_events, train_labels, valid_events, valid_labels, predicted_labels, model

In [39]:
def vectorize_train_evaluate(train, valid, valid_df, verbose = False, plot = False):
    print('featurizing train...')
    featurizer = Featurizer()
    start = time.time()
    train_events, train_labels = featurizer.featurize(train, allow_new_features=True, verbose = verbose)
    print('featurizing valid...')
    valid_events, valid_labels = featurizer.featurize(valid, allow_new_features=False, verbose = verbose)
    end = time.time()
    print("Vectorizing took:", int((end-start)), "sec")
    
    return train_and_evaluate(train_events, train_labels, valid_events, valid_labels, valid_df, featurizer, plot)

TFIDF scores for word monograms and bigrams as a baseline for adding additional features:

In [40]:
#Testing showed that there seems to be no improvement when using more than bigrams
#tfidf_vectorizer = TfidfVectorizer(ngram_range = (1,2))
tfidf_vectorizer = TfidfVectorizer(ngram_range = (1,2), token_pattern=r"(?u)\b\w\w+\b|!|\?|\"|\'")

tfidf_train = tfidf_vectorizer.fit_transform(train_df['comment_text'])
target_train = list(train_df["target"])
tfidf_valid = tfidf_vectorizer.transform(valid_df['comment_text'])
valid_train = list(valid_df["target"])

res = train_and_evaluate(tfidf_train, target_train, tfidf_valid, valid_train, valid_df, tfidf_vectorizer)

training...
Training took: 9 sec
Predicting took: 0 sec
   label   n_true  n_tagged    precision           recall              F-score
       1    28788    25360     0.66 / 0.66         0.58 / 0.58         0.61 / 0.61    
       0   332306   335734     0.96 / 0.96         0.97 / 0.97         0.97 / 0.97    

      Acc: 0.9422 / 0.9422
   Av.rec: 0.5172 / 0.5169
     AUC : 0.8099 / 0.8101
-----------------------------
From 20874 incorrectly classified, 6403 are ambiguous ( 30.674523330458943 %)
saved (non-ambiguous) examples for correctly and incorrectly classified comments to /examples


Compare previous results to the same model but given data where negated words are prefixed with "NEG_" and the negation being removed. Also check how many classifications are corrected by doing that and how many are falsified and saving some examples to disk:

In [42]:
#if (1 == 0):
fields = ['comment_text', 'toxicity','target']

train_df = pd.read_csv("data/pre_processed/_NEG_for_negations/train_negations_fliped.csv", usecols = fields)
train_df = train_df[:int(len(train_df) * 1/4)]
valid_df = pd.read_csv("data/pre_processed/_NEG_for_negations/valid_negations_fliped.csv", usecols = fields)

tfidf_vectorizer = TfidfVectorizer(ngram_range = (1,2))

tfidf_train = tfidf_vectorizer.fit_transform(train_df['comment_text'])
target_train = list(train_df["target"])
tfidf_valid = tfidf_vectorizer.transform(valid_df['comment_text'])
valid_train = list(valid_df["target"])

res1 = train_and_evaluate(tfidf_train, target_train, tfidf_valid, valid_train, valid_df, tfidf_vectorizer)

featurizer, train_events, train_labels, valid_events, valid_labels, predicted_labels, model = res
featurizer1, train_events1, train_labels1, valid_events1, valid_labels1, predicted_labels1, model1 = res1

difference_msk = [ x != y for (x,y) in zip(predicted_labels, predicted_labels1)]
differently_classified = valid_df.loc[difference_msk]

differently_classified["normal"] = predicted_labels[difference_msk]
differently_classified["flipped"] = predicted_labels1[difference_msk]

differently_classified.to_csv("examples/differently_classified.csv")


negated_insults = ["NEG_idiot", "NEG_fool", "NEG_stupid"]
neg_insults_corrected = 0
neg_insults_falsified = 0
neg_total_corrected = 0
neg_total_falsified = 0

for index, row in differently_classified.iterrows():
    #print(row['comment_text'])
    if any(x in row['comment_text'] for x in negated_insults):
        if ((row["target"] == row["normal"]) and (row["target"] != row["flipped"])):
            neg_insults_falsified += 1
        if ((row["target"] != row["normal"]) and (row["target"] == row["flipped"])):
            neg_insults_corrected += 1
    if ((row["target"] == row["normal"]) and (row["target"] != row["flipped"])):
            neg_total_falsified += 1
    if ((row["target"] != row["normal"]) and (row["target"] == row["flipped"])):
            neg_total_corrected += 1

print("for all comments where either 'idiot', 'fool' or 'stupid' was flipped:\n" + str(neg_insults_corrected), \
     "where corrected by flipping and", neg_insults_falsified , "where falsified by flipping", "\nin total," ,\
     neg_total_corrected, "where corrected and", neg_total_falsified, "where falsified\n" + \
     str((len(differently_classified)/len(valid_df)) * 100), "% of comments where differently classified")

training...
Training took: 7 sec
Predicting took: 0 sec
   label   n_true  n_tagged    precision           recall              F-score
       1    28788    25303     0.66 / 0.66         0.58 / 0.58         0.61 / 0.61    
       0   332306   335791     0.96 / 0.96         0.97 / 0.97         0.97 / 0.97    

      Acc: 0.9422 / 0.9422
   Av.rec: 0.5169 / 0.5169
     AUC : 0.8101 / 0.8101
-----------------------------
From 20871 incorrectly classified, 6403 are ambiguous ( 30.678932490057974 %)
saved (non-ambiguous) examples for correctly and incorrectly classified comments to /examples


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  differently_classified["normal"] = predicted_labels[difference_msk]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  differently_classified["flipped"] = predicted_labels1[difference_msk]


Preventive maintenance be cheap than let infrastructure fall apart . Maybe the voter be NEG_idiot after all . 

I smell a rat . This make NEG_sense . Of course they spy on each other . Of course the hack each other . But Putin be a NEG_stupid man . So this make NEG_sense . -PRON- have a set up . I can NEG_stand Trump but -PRON- would be with him on this in doubt it . But it would effectively tie Trump up with regard to Russia . The military Industrial complex would NEG_want to end the cold war with Russia a it mean arm and money for them . 

Please provide example and of this so call over reach on state hunt rule . At that point -PRON- will assume your 're a NEG_idiotic a you sound . 

Jerry No . You be wrong . What you say would be like prohibit a victim of kidnap from bring charge after they pay a ransom . A victim have every right to press charge for themselves . do be NEG_stupid . 

The youth of ANC be thankfully a NEG_stupid a Ramaphosa seem to think . They be aware of the situati

Adding additional features on top of the TFIDF values

In [None]:
#set additional features
Featurizer.feature_functions = ['char_1grams', 'char_2grams']

#create featurizer who's dictionary is based on the tfidf-vectorizers dictionary
custom_featurizer = Featurizer(foreign_features = tfidf_vectorizer.vocabulary_.copy())
train_events, train_labels = custom_featurizer.featurize(train, allow_new_features=True, verbose = True)
valid_events, valid_labels = custom_featurizer.featurize(valid, allow_new_features=False, verbose = True)

#combine outputs of tfidf and custom featurizer
train_tfidf_custom = custom_featurizer.merge_feature_matrices(tfidf_train, train_events)
valid_tfidf_custom = custom_featurizer.merge_feature_matrices(tfidf_valid, valid_events)

res = train_and_evaluate(train_tfidf_custom, train_labels, valid_tfidf_custom, valid_labels, valid_df, custom_featurizer)

0%...3%...6%...8%...11%...14%...17%...19%...22%...25%...28%...30%...33%...36%...39%...42%...44%...47%...50%...53%...55%...58%...61%...64%...66%...69%...72%...75%...78%...80%...83%...86%...89%...91%...94%...97%...100%...done!
0%...3%...6%...8%...11%...14%...17%...19%...22%...25%...28%...30%...33%...36%...39%...42%...44%...47%...50%...53%...55%...58%...61%...64%...66%...69%...72%...75%...78%...80%...83%...86%...89%...91%...94%...97%...

In [None]:
#Printing featurized version of a comment to check if everything went right 
custom_featurizer.print_sample_with_feature_names(train_tfidf_custom, 6)

In [88]:
#A cell for trying stuff out

#custom_featurizer.print_sample_with_feature_names(train_tfidf_custom, 1)
featurizer, train_events, train_labels, valid_events, valid_labels, predicted_labels, model = res
#te = tfidf_vectorizer.transform(['Wow . be a hate fill monster .'])
te = tfidf_vectorizer.transform(['kill them chop them up and sell off the part .'])

model.predict(te)

array([1])