# Toxic comment detection using SVM and different vectorization features

In [1]:
import re
import numpy as np
import scipy
import scipy.sparse as sp
from scipy.sparse import csr_matrix
import pandas as pd
import pickle
from sklearn import svm
from sklearn.metrics import roc_auc_score, precision_recall_fscore_support
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn import preprocessing
import nltk
from nltk import ngrams
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
from collections import defaultdict
import time
import matplotlib.pyplot as plt
import editdistance
from spellchecker import SpellChecker
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('vader_lexicon')

#List of profane words 
profane_list = list(pd.read_csv("data/profane_list2.csv")['name'])
most_tox_features_list = list(pd.read_csv("data/top_toxic_featurenames_long.csv")['word'])

[nltk_data] Downloading package punkt to /home/krise/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/krise/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/krise/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Custom Featurizer and training & evaluation functions based on https://github.com/bmeaut/python_nlp_2018_spring/blob/master/course_material/14_Semantics_II/14_Semantics_2_lab.ipynb

In [2]:
#Custom featurizer calss
#Can either be used as a standalone featurizer or build upon the dictionary of an existing featurizer such that
#the output of this featurizer and the foreign featurizer can be concatenated and this featurizers dictionary
#can be used as the dictionary for the result.

class Featurizer():    
    sa = SentimentIntensityAnalyzer() #vader sentiment analyzer for sentence-sentiment feature
    feature_functions = [] #for holding function names that the Featurizer shall apply
    
    #potential feature functions:
    @staticmethod
    def number_of_profanities(text):
        n = 0
        words_tokenized = word_tokenize(text)
        for word in words_tokenized:
            if (word in profane_list):
                n += 1
        yield ("number of profanities", n / len(words_tokenized))  
        
    @staticmethod
    def pos_tags(text):
        word_tag_tuples = nltk.pos_tag(word_tokenize(text))
        tag_count_dict = {}
        for word_tag_tuple in word_tag_tuples:
            if(word_tag_tuple[1] in tag_count_dict.keys()):
                tag_count_dict[word_tag_tuple[1]] += 1
            else:
                tag_count_dict[word_tag_tuple[1]] = 1

        for tag in tag_count_dict:
            yield (tag, tag_count_dict[tag] / len(word_tag_tuples))

    @staticmethod
    def vader_sentiment(text):
        sentiments = Featurizer.sa.polarity_scores(text)
        for key in sentiments:
            yield("vader " + key, sentiments[key])
            
    @staticmethod
    def comment_length(text):
        yield ("text length", len(text))
            
    @staticmethod
    def percentage_uppercase(text):
        yield ("% uppercase", sum(1 for c in text if c.isupper()) /len(text))       
    
    @staticmethod
    def pos_ngrams(text):
        n = 2
        tokens = word_tokenize(text)
        tag_count_dict = {}
        
        l = len(tokens)
        word_tag_tuples = nltk.pos_tag(tokens)
        for x in range(0, l - n + 1):
            pos_ngram = ""
            for i in range(0, n):
                pos_ngram += word_tag_tuples[x + i][1] + " "
            if(pos_ngram in tag_count_dict.keys()):
                tag_count_dict[pos_ngram] += 1
            else:
                tag_count_dict[pos_ngram] = 1

        for tag in tag_count_dict:
            yield (tag, tag_count_dict[tag] / (len(word_tag_tuples) / 2))
        
    #Either start completely new feature/id dictionary if only features of this custom featurizer will be used
    #or base dictionary on that of a different featurizer so they can be used together (e.g sklearns TFIDF vectorizer)
    def __init__(self, foreign_features = None):
        if (foreign_features == None): #Will create standalone dictionaries
            self.features = {}
            self.features_by_id = {}
            self.next_feature_id = 0
            self.max_foreign_feature = 0
        else: #Some other vectorizer will be used in addition to this one
            self.features = foreign_features
            self.features_by_id = {v: k for k, v in foreign_features.items()}
            self.next_feature_id = max(foreign_features.values()) + 1
            self.max_foreign_feature = self.next_feature_id

    def to_sparse(self, events):
        """convert sets of ints to a scipy.sparse.csr_matrix"""
        data, row_ind, col_ind = [], [], []
        for event_index, event in enumerate(events):
            for feature, value in event:
                if (value != None):
                    data.append(value)
                else:
                    data.append(1)
                    
                row_ind.append(event_index)
                #foreign features will be 0 if only this featurizer is used
                col_ind.append(feature - self.max_foreign_feature) 
                
        n_features = len(self.features.keys()) - self.max_foreign_feature
        n_events = len(events)
        matrix = scipy.sparse.csr_matrix(
            (data, (row_ind, col_ind)), shape=(n_events, n_features))
        return matrix

    def featurize(self, dataset, allow_new_features=False, verbose = False):
        events, labels = [], []
        n_events = len(dataset)
        for c, (text, label) in enumerate(dataset):
            if (verbose):
                if c % 10000 == 0:
                    print("{0:.0%}...".format(c/n_events), end='')
            labels.append(label)
            events.append(set())
            for function_name in Featurizer.feature_functions:
                function = getattr(Featurizer, function_name)
                for feature, value in function(text):
                    if feature not in self.features:
                        if not allow_new_features:
                            continue
                        self.features[feature] = self.next_feature_id
                        self.features_by_id[self.next_feature_id] = feature
                        self.next_feature_id += 1
                    feat_id = self.features[feature]
                    events[-1].add((feat_id, value))
                    
        events_sparse = self.to_sparse(events)
        labels_array = np.array(labels)
        print('done!')
        return events_sparse, labels_array

    def get_feature_names(self):
        return list([k for k, v in sorted(self.features.items(), key=lambda item: item[1])])
    
    def get_featurname_to_id_dict(self):
        return self.features
        
    def get_id_to_featurname_dict(self):
        return self.features_by_id
        
    #merge two feature matrices
    def merge_feature_matrices(self, foreign_matrix, inherent_matrix):
        return sp.hstack([foreign_matrix, inherent_matrix])
    
    #print feature values and names 
    def print_sample_with_feature_names(self, feature_matrix, sample_id):
        sample = feature_matrix.getrow(sample_id)
        nonzero_idxs = sample.nonzero()[1]
        for idx in nonzero_idxs:
            print(self.features_by_id[idx].rjust(30, ' ') , ":" , sample.getcol(idx).toarray()[0][0])

In [3]:
# function for plotting most important features for SVM in a plot(works only for linear kernel)
def plot_coefficients(classifier, feature_names, top_features=20):
    #feature_names*=3
    coef = classifier.coef_.ravel()
    top_positive_coefficients = np.argsort(-coef)[:top_features]
    top_negative_coefficients = np.argsort(-coef)[-top_features:]
    top_coefficients = np.hstack([top_positive_coefficients, top_negative_coefficients])
    # create plot
    plt.figure(figsize=(3, 10))
    colors = ['green' if c < 0 else 'red' for c in coef[top_coefficients]]
    plt.barh(np.arange(2 * top_features), coef[top_coefficients], color=colors)
    feature_names = np.array(feature_names)
    plt.yticks(np.arange(0,2 * top_features), feature_names[top_coefficients], ha='right')
    plt.show()

In [4]:
#best results so far for comparing
base = {"1 precision": 0.66, "1 recall" : 0.58, "1 f-score" :0.61, "0 precision": 0.96, \
        "0 recall" : 0.97, "0 f-score" :0.97, "acc" : 0.9422, "Av.rec": 0.5169, "auc": 0.8101}

In [5]:
#print results and save some examples of correctly and incorrectly classified comments to disk
def evaluate(predictions, dev_labels, valid_df, model, featurizer, plot):
    stats_by_label = defaultdict(lambda: defaultdict(int))
    if (isinstance(predictions, np.ndarray)):
        predictions = predictions.tolist()
    for i, gold in enumerate(dev_labels):
        auto = predictions[i]
        # print(auto, gold)
        if auto == gold:
            stats_by_label[auto]['tp'] += 1
        else:
            stats_by_label[auto]['fp'] += 1
            stats_by_label[gold]['fn'] += 1

    print("{:>8} {:>8}  {:>8}    {:>8}         {:>8}             {:>8}".format(
        'label', 'n_true', 'n_tagged', 'precision', 'recall', 'F-score'))
    for label, stats in stats_by_label.items():
        all_tagged = stats['tp'] + stats['fp']
        stats['prec'] = stats['tp'] / all_tagged if all_tagged else 0
        all_true = stats['tp'] + stats['fn']
        stats['rec'] = stats['tp'] / all_true if all_true else 0
        stats['f'] = (2 / ((1/stats['prec']) + (1/stats['rec']))
                      if stats['prec'] > 0 and stats['rec'] > 0 else 0)

        print("{:>8} {:>8} {:>8} {:>8.2f} / {:<8.2f} {:>8.2f} / {:<8.2f} {:>8.2f} / {:<8.2f}".format(
            label, all_true, all_tagged, stats['prec'], base[str(label) + " precision"], stats['rec'], 
            base[str(label) + " recall"], stats['f'], base[str(label) + " f-score"]))

    accuracy = (
        sum([stats_by_label[label]['tp'] for label in stats_by_label]) /
        len(predictions)) if predictions else 0

    av_rec = sum([stats['rec'] for stats in stats_by_label.values()]) / 3
    f_pn = (stats_by_label['positive']['f'] +
            stats_by_label['negative']['f']) / 2

    print()
    print("{:>10} {:>.4f} / {:<.4f}".format('Acc:', accuracy, base["acc"]))
    print("{:>10} {:>.4f} / {:<.4f}".format('Av.rec:', av_rec, base["Av.rec"]))
    print("{:>10} {:>.4f} / {:<.4f}".format('AUC :', roc_auc_score(predictions, dev_labels), base["auc"]))
    print("-----------------------------")
    
    incorrectly_classified_msk = [ x != y for (x,y) in zip(predictions, list(valid_df['target']))]
    correctly_classified_msk = [not x for x in incorrectly_classified_msk]
    incorrectly_classified = valid_df[incorrectly_classified_msk]
    correctly_classified = valid_df[correctly_classified_msk]
    
    incorrectly_classified_ambig = incorrectly_classified.loc[(incorrectly_classified['toxicity'] > 0.4)\
                                                              & (incorrectly_classified['toxicity'] < 0.6)] 
    print("From", len(incorrectly_classified), "incorrectly classified,", len(incorrectly_classified_ambig),\
          "are ambiguous", "(", len(incorrectly_classified_ambig) / len(incorrectly_classified) *100, "%)")
    
    incorrectly_classified.loc[(incorrectly_classified['toxicity'] <= 0.4)\
                                | (incorrectly_classified['toxicity'] >= 0.6)].head(1000).to_csv("examples/incorrect.csv")
    correctly_classified.loc[(correctly_classified['toxicity'] <= 0.4)\
                                | (incorrectly_classified['toxicity'] >= 0.6)].head(1000).to_csv("examples/correct.csv")
    print("saved (non-ambiguous) examples for correctly and incorrectly classified comments to /examples" )
    
    if (plot):
        plot_coefficients(model, featurizer.get_feature_names())

Reading in data with pandas

In [6]:
fields = ['comment_text', 'toxicity','target']

train_df = pd.read_csv("data/pre_processed/train_lemma_nopunct_cleaned_sentencized.csv", usecols = fields)
#train_df = pd.read_csv("experimental/train_negations_fliped.csv", usecols = fields)
train_df = train_df[:int(len(train_df) * 1/4)]
train_df.dropna(inplace = True)
valid_df = pd.read_csv("data/merged_with_txt.csv")
#valid_df = pd.read_csv("data/pre_processed/valid_lemma_nopunct_cleaned_sentencized.csv", usecols = fields)
#valid_df = pd.read_csv("experimental/valid_negations_fliped.csv", usecols = fields)
valid_df.dropna(inplace = True)
test_df = pd.read_csv("data/pre_processed/test_lemma_nopunct_cleaned_sentencized.csv", usecols = fields)
test_df.dropna(inplace = True)

train = list(zip(list(train_df['comment_text']), list(train_df['target'])))
valid = list(zip(list(valid_df['comment_text']), list(valid_df['target'])))

In [7]:
def train_and_evaluate(train_events, train_labels, valid_events, valid_labels, valid_df, featurizer, plot = False):
    print('training...')
    start = time.time()
    model = svm.LinearSVC(max_iter = 10000)
    model.fit(train_events, train_labels)
    end = time.time()
    print("Training took:", int((end-start)), "sec")

    start = time.time()
    predicted_labels = model.predict(valid_events)
    end = time.time()
    print("Predicting took:", int((end-start)), "sec")

    evaluate(predicted_labels, valid_labels, valid_df, model, featurizer, plot)
    return featurizer, train_events, train_labels, valid_events, valid_labels, predicted_labels, model

In [8]:
def vectorize_train_evaluate(train, valid, valid_df, verbose = False, plot = False):
    print('featurizing train...')
    featurizer = Featurizer()
    start = time.time()
    train_events, train_labels = featurizer.featurize(train, allow_new_features=True, verbose = verbose)
    print('featurizing valid...')
    valid_events, valid_labels = featurizer.featurize(valid, allow_new_features=False, verbose = verbose)
    end = time.time()
    print("Vectorizing took:", int((end-start)), "sec")
    
    return train_and_evaluate(train_events, train_labels, valid_events, valid_labels, valid_df, featurizer, plot)

TFIDF scores for word monograms and bigrams as a baseline for adding additional features:

In [9]:
#Testing showed that there seems to be no improvement when using more than bigrams
#tfidf_vectorizer_chars = TfidfVectorizer(analyzer = 'char_wb', ngram_range = (1,5), min_df = 10)
tfidf_vectorizer_words = TfidfVectorizer(ngram_range = (1,2)) 
#chars_dict = tfidf_vectorizer_chars.vocabulary_.copy()
#words_dict = tfidf_vectorizer_words.vocabulary_.copy()

#max_id_chars = chars_dict[max(chars_dict, key=chars_dict.get)]
#words_dict = {k: (v + max_id_chars + 1) for k, v words_dict.items()}
#char_word_dict = {**chars_dict, words_dict}

#sp.hstack([tfidf_vectorizer_chars, tfidf_vectorizer_words])

#tfidf_train_chars = tfidf_vectorizer_chars.fit_transform(train_df['comment_text'])
tfidf_train_words = tfidf_vectorizer_words.fit_transform(train_df['comment_text'])
target_train = list(train_df["target"])

#tfidf_valid_chars = tfidf_vectorizer_chars.transform(valid_df['comment_text'])
tfidf_valid_words = tfidf_vectorizer_words.transform(valid_df['comment_text'])
target_valid = list(valid_df["target"])

#tfidf_train = sp.hstack([tfidf_train_chars, tfidf_train_words])
#tfidf_valid = sp.hstack([tfidf_valid_chars, tfidf_valid_words])

res = train_and_evaluate(tfidf_train_words, target_train, tfidf_valid_words, target_valid, valid_df, tfidf_vectorizer_words)

training...
Training took: 5 sec
Predicting took: 0 sec
   label   n_true  n_tagged    precision           recall              F-score
       1    28788    25364     0.66 / 0.66         0.58 / 0.58         0.61 / 0.61    
       0   332306   335730     0.96 / 0.96         0.97 / 0.97         0.97 / 0.97    

      Acc: 0.9422 / 0.9422
   Av.rec: 0.5172 / 0.5169
     AUC : 0.8098 / 0.8101
-----------------------------
From 20880 incorrectly classified, 6398 are ambiguous ( 30.64176245210728 %)
saved (non-ambiguous) examples for correctly and incorrectly classified comments to /examples


Adding additional features on top of the TFIDF values

In [10]:
#set additional features
Featurizer.feature_functions = ['pos_tags']

#create featurizer who's dictionary is based on the tfidf-vectorizers dictionary
custom_featurizer = Featurizer(foreign_features = tfidf_vectorizer_words.vocabulary_.copy())
train_events, train_labels = custom_featurizer.featurize(train, allow_new_features=True, verbose = True)
valid_events, valid_labels = custom_featurizer.featurize(valid, allow_new_features=False, verbose = True)

#combine outputs of tfidf and custom featurizer
train_tfidf_custom = custom_featurizer.merge_feature_matrices(tfidf_train, train_events)
valid_tfidf_custom = custom_featurizer.merge_feature_matrices(tfidf_valid, valid_events)

res = train_and_evaluate(train_tfidf_custom, train_labels, valid_tfidf_custom, valid_labels, valid_df, custom_featurizer)

0%...3%...6%...8%...11%...14%...17%...19%...22%...25%...28%...30%...33%...36%...39%...42%...44%...47%...50%...53%...55%...58%...61%...64%...66%...69%...72%...75%...78%...80%...83%...86%...89%...91%...94%...97%...100%...done!
0%...3%...6%...8%...11%...14%...17%...19%...22%...25%...28%...30%...33%...36%...39%...42%...44%...47%...50%...53%...55%...58%...61%...64%...66%...69%...72%...75%...78%...80%...83%...86%...89%...91%...94%...97%...100%...done!


NameError: name 'tfidf_train' is not defined

In [None]:
#Printing featurized version of a comment to check if everything went right 
custom_featurizer.print_sample_with_feature_names(valid_tfidf_custom, 1)

Helper methods for performing spell-checking

In [9]:
spell = SpellChecker()

def canBeEqual(a, b):
    l = len(a)
    if l != len(b):
        return False
    for i in range(0, l - 1):
        if (a[:i] + a[i + 1] + a[i] + (a[i + 2:] if (i < l) else "") == b):
            return True
    return False

def spellfix_toxic_words(string, freq_dict):
    global i
    #for token in word_tokenize(string):
    for token in string.split():
        if (len(token) > 3 and freq_dict[token] < 5):
            for tox_word in most_tox_features_list:
                ed = editdistance.eval(token.lower(), tox_word.lower())
                if ((ed == 1 or ed ==2) and '*' in token):
                    #print("fixing", token, tox_word)
                    #i +=1
                    return string.replace(token, tox_word)
                elif (ed == 2 and canBeEqual(token.lower(), tox_word.lower())):
                    #print("fixing", token, tox_word)
                    #i += 1
                    return string.replace(token, tox_word) 
                elif (ed == 1 and len(spell.unknown([token.lower()])) > 0):
                    #print("fixing", token, tox_word)
                    #i += 1
                    return string.replace(token, tox_word)
                elif (token.count('*') >= 2 and token.lower().replace('*', '') == tox_word.lower()):
                    #print("fixing", token, tox_word)
                    #i += 1
                    return string.replace(token, tox_word)              
    return string

Considering the above trained model as the baseline, this method is used for comparing it against a different model (be it a model with different features/parameters or different preprocessing). A comparison of the 2 models is then printed.

In [9]:
def compare_different_approaches(res, train_df, valid_df):
    fields = ['comment_text', 'toxicity','target']
    tfidf_vectorizer = TfidfVectorizer(ngram_range = (1,2))

    tfidf_train = tfidf_vectorizer.fit_transform(train_df['comment_text'])
    target_train = list(train_df["target"])
    tfidf_valid = tfidf_vectorizer.transform(valid_df['comment_text'])
    valid_train = list(valid_df["target"])

    res1 = train_and_evaluate(tfidf_train, target_train, tfidf_valid, valid_train, valid_df, tfidf_vectorizer)

    featurizer, train_events, train_labels, valid_events, valid_labels, predicted_labels, model = res
    featurizer1, train_events1, train_labels1, valid_events1, valid_labels1, predicted_labels1, model1 = res1

    difference_msk = [ x != y for (x,y) in zip(predicted_labels, predicted_labels1)]
    differently_classified = pd.DataFrame(valid_df.loc[difference_msk])

    differently_classified["initial"] = predicted_labels[difference_msk]
    differently_classified["with_different_preproc"] = predicted_labels1[difference_msk]
    differently_classified.to_csv("examples/differently_classified.csv")

    total_corrected = 0
    false_positives_introduced = 0
    false_positives_fixed = 0
    false_negatives_introduced = 0
    false_negatives_fixed = 0
    total_falsified = 0

    for index, row in differently_classified.iterrows():
        #print(row['comment_text'])
        if ((row["target"] == row["initial"]) and (row["target"] != row["with_different_preproc"])):
                total_falsified += 1
                if (row["target"] == 0):
                    false_positives_introduced += 1
                else:
                    false_negatives_introduced += 1
        if ((row["target"] != row["initial"]) and (row["target"] == row["with_different_preproc"])):
                total_corrected += 1
                if (row["target"] == 0):
                    false_negatives_fixed += 1
                else:
                    false_positives_fixed += 1

    valid_len = len(valid_df)
    
    scores_old = precision_recall_fscore_support(valid_labels, predicted_labels)
    scores_new = precision_recall_fscore_support(valid_labels1, predicted_labels1)
    
    print("")
    print("-" * 50)
    print("{:>21s}  |  {:3s}".format("old", "new"))
    print("{:15s}= {:2.2f}  |  {:2.2f}".format("Precision 1", scores_old[0][1], scores_new[0][1]))
    print("{:15s}= {:2.2f}  |  {:2.2f}".format("Precision 0", scores_old[0][0], scores_new[0][0]))
    print("{:15s}= {:2.2f}  |  {:2.2f}".format("Recall 1", scores_old[1][1], scores_new[1][1]))
    print("{:15s}= {:2.2f}  |  {:2.2f}".format("Recall 0", scores_old[1][0], scores_new[1][0]))
    print("{:15s}= {:2.2f}  |  {:2.2f}".format("F-score 1", scores_old[2][1], scores_new[2][1]))
    print("{:15s}= {:2.2f}  |  {:2.2f}".format("F-score 0", scores_old[2][0], scores_new[2][0]))

    print("\n{:14s} : fixed = {:4d} ({:3.3f}%)  |  falsified = {:4d} ({:3.3f}%)"\
          .format("Total", total_corrected, (total_corrected / valid_len * 100), total_falsified,\
                  (total_falsified / valid_len * 100) ))
    print("{:14s} : fixed = {:4d} ({:3.3f}%)  |  introduced = {:4d} ({:3.3f}%)"\
          .format("False positives" ,false_positives_fixed , false_positives_fixed / valid_len * 100 \
            , false_positives_introduced, false_positives_introduced / valid_len * 100))
    print("{:14s} : fixed = {:4d} ({:3.3f}%)  |  introduced = {:4d} ({:3.3f}%)"\
          .format("False negatives" ,false_negatives_fixed , false_negatives_fixed / valid_len * 100\
            ,false_negatives_introduced, false_negatives_introduced / valid_len * 100))
    
#perform spell-fixing as a preprocess step
if (1 == 0):
    cv = CountVectorizer(lowercase = False, token_pattern="[\S]+",tokenizer=None)
    cv_fit = cv.fit_transform(list(train_df['comment_text']) + list(valid_df['comment_text']))
    word_list = cv.get_feature_names();    
    counts = np.asarray(cv_fit.sum(axis=0))[0]
    freq_dict = dict(zip(word_list, counts))
    train_df1 = train_df.copy()
    valid_df1 = valid_df.copy()
    train_df1['comment_text'] = train_df1['comment_text'].apply(lambda x: spellfix_toxic_words(x, freq_dict))
    valid_df1['comment_text'] = valid_df1['comment_text'].apply(lambda x: spellfix_toxic_words(x, freq_dict))

    
#use differently pre-processed data
#train_df = pd.read_csv("data/pre_processed/_NEG_for_negations/train_negations_fliped.csv", usecols = fields)
#train_df = train_df[:int(len(train_df) * 1/4)]
#train_df.dropna(inplace = True)
#valid_df = pd.read_csv("data/pre_processed/_NEG_for_negations/train_negations_fliped.csv", usecols = fields)
#valid_df.dropna(inplace = True)

compare_different_approaches(res, train_df, \
                                valid_df)

training...
Training took: 3 sec
Predicting took: 0 sec
   label   n_true  n_tagged    precision           recall              F-score
       1    28788    24109     0.66 / 0.66         0.55 / 0.58         0.60 / 0.61    
       0   332306   336985     0.96 / 0.96         0.98 / 0.97         0.97 / 0.97    

      Acc: 0.9411 / 0.9422
   Av.rec: 0.5081 / 0.5169
     AUC : 0.8086 / 0.8101
-----------------------------
From 21277 incorrectly classified, 6591 are ambiguous ( 30.977111434882737 %)
saved (non-ambiguous) examples for correctly and incorrectly classified comments to /examples

--------------------------------------------------
                  old  |  new
Precision 1    = 0.68  |  0.66
Precision 0    = 0.96  |  0.96
Recall 1       = 0.55  |  0.55
Recall 0       = 0.98  |  0.98
F-score 1      = 0.61  |  0.60
F-score 0      = 0.97  |  0.97

Total          : fixed = 2071 (0.574%)  |  falsified = 2908 (0.805%)
False positives : fixed =  914 (0.253%)  |  introduced = 1888 (0.523%

Inspecting missclassified examples

In [34]:
#print a models weights, featrue values and feature names for list of samples
def print_feature_weights_for_sample(model, featurizer, sample_comments, sample_labels):
    model_weights = model.coef_
    vectorized_samples = featurizer.transform(sample_comments)
    idx_to_feat_name_vocab = {v: k for k, v in featurizer.vocabulary_.items()} 
    
    manual_insp_false_negatives = pd.read_csv("inspection/false_negatives.csv")
    annotated_comments = list(manual_insp_false_negatives['comment_text'])
    
    for i, row in enumerate(vectorized_samples):
        if(sample_comments[i] in annotated_comments):
            cat1 = list(manual_insp_false_negatives[manual_insp_false_negatives['comment_text'] == sample_comments[i]]['category'])[0]
            l = list(manual_insp_false_negatives[manual_insp_false_negatives['comment_text'] == sample_comments[i]]['category2'])
            cat2 = l[0] if str(l[0]) != "nan" else ""
            cats = ['Lack of tox. Words', 'disagree', 'N-Tox. Ass. Neutral words', 'No tox. Words']
            if (cat1 in cats or cat2 in cats):
                sumi = 0
                features_pos = {}
                features_neg = {}
                print(sample_comments[i], "\n" + "True label:", sample_labels[i])
                #print('\n{:20s} {:5s}  | {:5s} {:>15s}'.format("Feature", "Weight", "Value", "Result"))

                for idx in row.nonzero()[1]:
                    weight = model_weights[0][idx]
                    #print('{:>20s} - {: 3.2f} | {: 3.2f}  =  '\
                    #      .format(idx_to_feat_name_vocab[idx], weight, row.getcol(idx).toarray()[0][0]), (weight * row.getcol(idx).toarray()[0][0]))
                    if (weight * row.getcol(idx).toarray()[0][0] >= 0):
                        features_pos[idx_to_feat_name_vocab[idx]] = (weight * row.getcol(idx).toarray()[0][0], weight, row.getcol(idx).toarray()[0][0])
                    else:
                        features_neg[idx_to_feat_name_vocab[idx]] = (weight * row.getcol(idx).toarray()[0][0], weight, row.getcol(idx).toarray()[0][0])
                    sumi += (weight * row.getcol(idx).toarray()[0][0])
                print("\n-{:>35s}---- Result: {:2.2f} ----".format("", sumi))

                features_pos = list({k: v for k, v in sorted(features_pos.items(), key=lambda item: item[1])}.items())
                features_neg = list({k: v for k, v in sorted(features_neg.items(), key=lambda item: item[1])}.items())

                print("{:>35s} : {:3.2f}  |  {:3.2f} : {:35s}\n".format("sum toxic", sum([x[1][0] for x in features_pos]), \
                                                                        sum([x[1][0] for x in features_neg]), "sum non-toxic"))
                for j in range(0, max(len(features_pos), len(features_neg))):
                    print("{:>20s} - (w={:3.2f}/v={:3.2f}) {:3.2f} | {:3.2f} (w={:3.2f}/v={:3.2f}) - {:20s}".format(\
                         (features_pos[len(features_pos) - 1 -j][0] if j <len(features_pos) else ""),\
                         (features_pos[len(features_pos) - 1 - j][1][1] if j <len(features_pos) else 0),\
                         (features_pos[len(features_pos) - 1 - j][1][2] if j <len(features_pos) else 0),\
                         (features_pos[len(features_pos) - 1 - j][1][0] if j <len(features_pos) else 0),\
                         (features_neg[j][1][0] if j <len(features_neg) else 0),\
                         (features_neg[j][1][1] if j <len(features_neg) else 0),\
                         (features_neg[j][1][2] if j <len(features_neg) else 0),\
                         (features_neg[j][0] if j <len(features_neg) else "")))
                print("\n")
                print("Category :", cat1)

                print("Category2:",cat2)
                l1 = list(manual_insp_false_negatives[manual_insp_false_negatives['comment_text'] == sample_comments[i]]['append.'])
                print("Words    :", l1 if str(l1[0]) != "nan" else "")

                print("\n------------------------------------------------------------------\n")

featurizer, train_events, train_labels, valid_events, valid_labels, predicted_labels, model = res 

incorrectly_classified = pd.read_csv("examples/incorrect.csv")
false_negatives = incorrectly_classified.loc[incorrectly_classified['target'] == 1]
false_positives = incorrectly_classified.loc[incorrectly_classified['target'] == 0]
#lstm_vs_svm = pd.read_csv("examples/lstm_vs_svm_valid.csv")
#false_negatives = lstm_vs_svm.loc[(lstm_vs_svm['lstm_result'] == 1) & (lstm_vs_svm['target'] == 1) & (lstm_vs_svm['svm_result'] == 0)]
#print(len(false_negatives))

print('b = ',model.intercept_)
print_feature_weights_for_sample(model, featurizer, list(false_negatives['comment_text']) , list(false_negatives['target']))

b =  [-1.05166094]
no much Chinese please . 
True label: 1

-                                   ---- Result: 0.86 ----
                          sum toxic : 0.87  |  -0.01 : sum non-toxic                      

             chinese - (w=0.94/v=0.42) 0.40 | -0.01 (w=-0.03/v=0.17) - much                
        much chinese - (w=0.36/v=0.69) 0.25 | 0.00 (w=0.00/v=0.00) -                     
             no much - (w=0.49/v=0.42) 0.21 | 0.00 (w=0.00/v=0.00) -                     
                  no - (w=0.06/v=0.19) 0.01 | 0.00 (w=0.00/v=0.00) -                     
              please - (w=0.01/v=0.31) 0.00 | 0.00 (w=0.00/v=0.00) -                     


Category : No tox. Words
Category2: 
Words    : 

------------------------------------------------------------------

Another brilliant lefty who have choose to ruin his life for a petty political prank . What be this knuckleheads learn in school . It certainly be not common sense . 
True label: 1

-                                  


-                                   ---- Result: 0.09 ----
                          sum toxic : 0.91  |  -0.81 : sum non-toxic                      

              sucker - (w=2.29/v=0.18) 0.40 | -0.15 (w=-0.66/v=0.23) - soothe              
            trump be - (w=1.41/v=0.12) 0.16 | -0.09 (w=-0.46/v=0.19) - the crowd           
               trump - (w=0.90/v=0.08) 0.07 | -0.07 (w=-0.56/v=0.13) - in                  
                 his - (w=0.88/v=0.08) 0.07 | -0.07 (w=-0.46/v=0.15) - be hold             
                  he - (w=0.49/v=0.07) 0.04 | -0.06 (w=-0.27/v=0.22) - rally in            
           sucker in - (w=0.14/v=0.25) 0.03 | -0.06 (w=-0.49/v=0.12) - perhaps             
                 ego - (w=0.16/v=0.17) 0.03 | -0.05 (w=-0.50/v=0.10) - find                
                  be - (w=0.76/v=0.03) 0.02 | -0.04 (w=-0.35/v=0.11) - hold                
            can find - (w=0.14/v=0.16) 0.02 | -0.03 (w=-0.14/v=0.23) - day trump           
          hold rally


-                                   ---- Result: 0.63 ----
                          sum toxic : 1.43  |  -0.80 : sum non-toxic                      

               crook - (w=3.45/v=0.12) 0.42 | -0.09 (w=-0.61/v=0.14) - horgan              
       typical leave - (w=0.70/v=0.17) 0.12 | -0.06 (w=-0.34/v=0.18) - her public          
           all other - (w=0.64/v=0.12) 0.08 | -0.06 (w=-0.45/v=0.13) - notley              
              weaver - (w=0.53/v=0.15) 0.08 | -0.06 (w=-0.66/v=0.09) - and other           
         their money - (w=0.62/v=0.12) 0.08 | -0.06 (w=-0.29/v=0.19) - union               
             typical - (w=0.54/v=0.11) 0.06 | -0.05 (w=-0.33/v=0.14) - cut back            
          leave wing - (w=0.44/v=0.11) 0.05 | -0.05 (w=-0.38/v=0.12) - trudeau and         
                wing - (w=0.43/v=0.10) 0.04 | -0.04 (w=-0.24/v=0.16) - money while         
                 hit - (w=0.43/v=0.10) 0.04 | -0.03 (w=-0.41/v=0.07) - sure                
                 her

NameError: name 'custom_featurizer' is not defined

In [5]:
#A cell for trying stuff out

#custom_featurizer.print_sample_with_feature_names(train_tfidf_custom, 1)
#featurizer, train_events, train_labels, valid_events, valid_labels, predicted_labels, model = res
#te = tfidf_vectorizer.transform(['Wow . be a hate fill monster .'])
#te = tfidf_vectorizer.transform(["Holy crap . You be amazing . Such fun . "])
#model.predict(te)
#print_feature_weights_for_sample(model, featurizer, ["He is an idiot !"], [1])

merged = pd.read_csv("data/merged.csv")
#roc_auc_score, precision_recall_fscore_support
stats = precision_recall_fscore_support(merged['target'],merged['result'])
auc = roc_auc_score(merged['target'],merged['result'])
print("0 Precision", stats[0][0] * 100, "Recall", stats[1][0] * 100, "Fscore", stats[2][0] * 100, "AUC", auc * 100)
print("1 Precision", stats[0][1] * 100, "Recall", stats[1][1] * 100, "Fscore", stats[2][1] * 100, "AUC", auc * 100)


0 Precision 97.28552501830016 Recall 96.78199796580384 Recall 97.03310826711076 AUC 82.80464355703002
1 Precision 64.94689917398715 Recall 68.82728914825623 Recall 66.83081489476524 AUC 82.80464355703002


In [None]:
#for getting most toxic features
featurizer, train_events, train_labels, valid_events, valid_labels, predicted_labels, model = res 
model_weights = model.coef_.copy()

max_idxs = np.argpartition(model_weights[0], -100)[-100:]

idx_to_feat_name_vocab = {v: k for k, v in tfidf_vectorizer.vocabulary_.items()} 

for idx in max_idxs:
    print(idx_to_feat_name_vocab[idx])
#print(np.array(model_weights).argsort()[-30:][::-1])