In [38]:
import pandas as pd
import re
import nltk
import string
from sklearn.feature_extraction.text import CountVectorizer
from sklearn import preprocessing

nltk.download("wordnet")
from nltk.stem import WordNetLemmatizer
comments_df = pd.read_csv('edos_labelled_data.csv') 
le = preprocessing.LabelEncoder()
comments_df["label"] = le.fit_transform(comments_df["label"])
comments_df.head()



[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/elliothagyard/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Unnamed: 0,rewire_id,text,label,split
0,sexism2022_english-9609,"In Nigeria, if you rape a woman, the men rape ...",0,train
1,sexism2022_english-16993,"Then, she's a keeper. 😉",0,train
2,sexism2022_english-13149,This is like the Metallica video where the poo...,0,train
3,sexism2022_english-13021,woman?,0,train
4,sexism2022_english-966,I bet she wished she had a gun,0,train


In [163]:
comments_train_df = comments_df[comments_df["split"] == "train"]
comments_test_df = comments_df[comments_df["split"] == "test"]
X_train = comments_train_df["text"]
Y_train = comments_train_df["label"]
X_test = comments_test_df["text"]
Y_test = comments_test_df["label"]
X_train.shape

(4193,)

In [40]:
def clean(comments: list[str]) -> list[str]:
    # remove unicode
    # remove punct
    comments_clean = [comment.encode("ascii", "ignore").decode() for comment in comments]
    comments_clean = list(map(lambda x : x.lower(), comments_clean))
    comments_clean = [re.sub(r'(#\w+|\[user\]|\[url\])', '', comment) for comment in comments_clean]
    translator = str.maketrans('', '', string.punctuation)
    comments_clean = [comment.translate(translator) for comment in comments_clean]
    #comments_clean = [[wln.lemmatize(word.strip()) for word in comment.split()] for comment in comments_clean]

    return comments_clean

In [41]:
def toWordFreqDF(x, y):
    clean_x = clean(x)
    vectorizer = CountVectorizer()
    print(type(clean_x))
    vec = vectorizer.fit_transform(clean_x)
    frequency_df = pd.DataFrame(vec.toarray(), columns=vectorizer.get_feature_names_out())
    frequency_df['_label'] = y.tolist()
    frequency_df['_label'].tail()
    return frequency_df

train_freq = toWordFreqDF(X_train, Y_train)
test_freq = toWordFreqDF(X_test, Y_test)


<class 'list'>
<class 'list'>


In [185]:
from nltk import word_tokenize, pos_tag
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return ''

def clean2EvenCleaner(x):
    clean_x = clean(x)
    lemmatizer = WordNetLemmatizer()
    out = []
    for sentence in clean_x:
        tokens = pos_tag(word_tokenize(sentence))
        tagged = list(map(
                lambda x : (x[0], get_wordnet_pos(x[1])),
                tokens
        ))
        
        word_and_pos = list(filter(
            lambda x : x[1] != '', 
            tagged
        ))

        out.append(" ".join(list(map(lambda x : lemmatizer.lemmatize(x[0], x[1]), word_and_pos))))
    return out

TF_IDF_VEC = TfidfVectorizer()
TF_IDF_VEC.fit(clean2EvenCleaner(X_train[Y_train==1]))

from sklearn.feature_extraction.text import TfidfVectorizer
def TFIDF(x, y):
    cleaned = clean2EvenCleaner(x)
    out = TF_IDF_VEC.transform(cleaned)
    df = pd.DataFrame(out.toarray(), columns=TF_IDF_VEC.get_feature_names_out())
    return df

vectorizer = CountVectorizer()
out = clean2EvenCleaner(X_train)
vectorizer.fit(out)

def secondParsing(x, y):
    out = vectorizer.transform(clean2EvenCleaner(x))
    df = pd.DataFrame(out.toarray(), columns=vectorizer.get_feature_names_out())

    return df
train_freq2 = secondParsing(X_train, Y_train)


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/elliothagyard/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/elliothagyard/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [187]:
from sklearn.metrics import f1_score 
freq_with_labels = train_freq2.copy()
freq_with_labels['_label'] = Y_train.tolist()

sexist_comment_percent = len(Y_train.loc[Y_train== 1]) / len(Y_train) 
common_freq2 = freq_with_labels.sum().loc[freq_with_labels.sum() >= 5]
sexist = freq_with_labels[freq_with_labels["_label"] == 1].sum().loc[freq_with_labels.sum() >= 5]
ratio  = sexist/common_freq2.sort_values()
bad_words = ratio.loc[ratio > 1 - sexist_comment_percent + .02].index

mid_words = ratio.loc[
    (1 - sexist_comment_percent + .02 > ratio) 
    & (ratio > 0.424)].index

def test_word(sentence : str):
    sexist = 0
    for word in sentence.split():
        if word in bad_words:
            sexist = 1
    return sexist

def score_sentence(sentence : str, midVal):
    sexist = 0
    for word in sentence.split():
        if word in bad_words:
            sexist += 10
        elif word in mid_words:
            sexist += midVal
    return sexist

predict3 = []
print()
for comment in clean2EvenCleaner(X_test):
    predict3.append(test_word(comment))

print(f1_score(Y_test, predict3, average = "weighted"))
predictions = []
for comment in clean2EvenCleaner(X_test):
    predictions.append(int(score_sentence(comment, 3) >= 10))
print(f1_score(Y_test, predictions, average = "weighted"))



0.7862923389870593
0.8038727340894786


In [151]:

pred_new = []
a = TFIDF(X_train, Y_train)
a['_label'] = Y_train.tolist()
a['_label'].tail()

sexist_comment_percent = len(Y_train.loc[Y_train== 1]) / len(Y_train) 
common_freq2 = a.sum().loc[a.sum() >= 5]
sexist = a[a["_label"] == 1].sum().loc[a.sum() >= 5]
ratio  = (sexist/common_freq2).sort_values()
bad_words = ratio.loc[ratio > 1 - sexist_comment_percent - .05].index
mid_words = ratio.loc[
    (1 - sexist_comment_percent + .02 > ratio) 
    & (ratio > 0.424)].index

for comment in clean2EvenCleaner(X_test):
    a =  score_sentence(comment, 3)
    pred_new.append(a >= 9)
    print(comment + " : " + str(a))
print(f1_score(Y_test, pred_new, average = "weighted"))

fuck nigger jew have history eat white people fuck suggest i get people want eat : 0
well then good someone have knock as back line act woman post bullshit base minority someone be not white be not : 6
sharia texas evil muslim parent try murder daughter boil oil refuse arrange marriage : 3
yes normal woman want be dominate social scientist call female hypergamy be woman make bad boss men generally speak : 9
didnt have be bitch literally go way rub rejection face simple i appriciate offer i have boyfriend have suffice : 10
tldr unclear cover shit now be call police : 0
ti true puff pas lead munchies philosophy : 0
lol canadian guy legally change gender female so save year car insurance female lawmaker be outrage be demand be arrest perjury bc say do just insurance saving ha ha ha ha : 0
be idiot read whole book : 0
just fuck lol do know hard life be subhuman dumb whore : 10
warrntanka be so butthurt dna fubar be see red yep right there front door lizzy btw give ticket money back : 0
i p

In [197]:

TF_IDF = TFIDF(X_train, Y_train)


In [48]:
TFIDF(X_test, Y_test)

Unnamed: 0,1950s,510,abdication,abhorrent,abide,ability,abject,able,abolish,abort,...,yt,yup,zat,zealand,zero,zionist,ziowhore,zog,zombie,zoom
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1081,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1082,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1083,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1084,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [198]:
from sklearn.ensemble import RandomForestClassifier
data = TF_IDF
labels = Y_train
print(Y_train)
rand_forest_TF_IDF = RandomForestClassifier(random_state=0, min_samples_split=3)
rand_forest_TF_IDF.fit(data, labels)
TF_IDF_rand_forest = rand_forest_TF_IDF.predict(TFIDF(X_test, Y_test))


0       0
1       0
2       0
3       0
4       0
       ..
5271    1
5272    0
5273    0
5274    0
5278    1
Name: label, Length: 4193, dtype: int64


In [193]:
from sklearn.ensemble import RandomForestClassifier
data = train_freq2
labels = Y_train
rand_forest = RandomForestClassifier(random_state=0, min_samples_split=3)
rand_forest.fit(data, labels)
encoded_test = secondParsing(X_test, Y_test)
word_freq_rand_forest = rand_forest.predict(encoded_test)

In [204]:
from sklearn.model_selection import RandomizedSearchCV
# Random Forest Tuning
import numpy as np
# Number of trees in random forest
#n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 5)]
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 5)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]# Create the random grid
random_grid = {#'n_estimators': n_estimators,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
rf = RandomForestClassifier(random_state=0)
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)# Fit the random search model
rf_random.fit(train_freq2, Y_train)

Fitting 3 folds for each of 72 candidates, totalling 216 fits




[CV] END bootstrap=True, max_depth=10, min_samples_leaf=1, min_samples_split=2; total time=   6.6s
[CV] END bootstrap=True, max_depth=10, min_samples_leaf=2, min_samples_split=5; total time=   6.2s
[CV] END bootstrap=True, max_depth=35, min_samples_leaf=1, min_samples_split=5; total time=  13.6s
[CV] END bootstrap=True, max_depth=35, min_samples_leaf=2, min_samples_split=5; total time=  12.4s
[CV] END bootstrap=True, max_depth=60, min_samples_leaf=1, min_samples_split=2; total time=  19.6s
[CV] END bootstrap=True, max_depth=60, min_samples_leaf=2, min_samples_split=2; total time=  17.4s
[CV] END bootstrap=True, max_depth=85, min_samples_leaf=1, min_samples_split=2; total time=  24.8s
[CV] END bootstrap=True, max_depth=85, min_samples_leaf=2, min_samples_split=2; total time=  21.4s
[CV] END bootstrap=True, max_depth=110, min_samples_leaf=1, min_samples_split=2; total time=  28.4s
[CV] END bootstrap=True, max_depth=110, min_samples_leaf=2, min_samples_split=5; total time=  22.7s
[CV] END

In [206]:
best_random = rf_random.best_estimator_
preds = best_random.predict(secondParsing(X_test, Y_test))
print(f1_score(Y_test, preds, average = "weighted"))

TypeError: secondParsing() missing 1 required positional argument: 'y'

In [200]:
from sklearn.tree import DecisionTreeClassifier
data = TF_IDF
labels = Y_train
print(Y_train)
decision_tree_TF_IDF = DecisionTreeClassifier(random_state=0, min_samples_split=3)
decision_tree_TF_IDF.fit(data, labels)
deciesion_tree_TF_IDF = decision_tree_TF_IDF.predict(TFIDF(X_test, Y_test))

0       0
1       0
2       0
3       0
4       0
       ..
5271    1
5272    0
5273    0
5274    0
5278    1
Name: label, Length: 4193, dtype: int64


In [201]:
from sklearn.metrics import f1_score

print(f1_score(Y_test, TF_IDF_rand_forest, average = "weighted"))
print(f1_score(Y_test, word_freq_rand_forest, average = "weighted"))
f1_score(Y_test, deciesion_tree_TF_IDF,average = "weighted")

0.793750422949767
0.7980430122618759


0.7619285623814721

In [None]:
# Using different encoding method
sexist_comment_percent = len(Y_train.loc[Y_train== 1]) / len(Y_train) 
common_freq2 = tf_idf_x_train.sum().loc[tf_idf_x_train.sum() >= 5]
sexist = tf_idf_x_train[tf_idf_x_train["_label"] == 1].sum().loc[tf_idf_x_train.sum() >= 5]
ratio  = (sexist/common_freq2).sort_values()
bad_words = ratio.loc[ratio > 1 - sexist_comment_percent + .02].index

pred_new = []
mid_words = ratio.loc[
    (1 - sexist_comment_percent + .02 > ratio) 
    & (ratio > 0.424)
].index

for comment in clean2EvenCleaner(X_test):
    pred_new.append(test_word2(comment, 3, 10))
print(f1_score(Y_test, pred_new, average = "weighted"))

      1950s  21st  510   ab  abandon  abbotts  abdication  abductor  abedin  \
0       0.0   0.0  0.0  0.0      0.0      0.0         0.0       0.0     0.0   
1       0.0   0.0  0.0  0.0      0.0      0.0         0.0       0.0     0.0   
2       0.0   0.0  0.0  0.0      0.0      0.0         0.0       0.0     0.0   
3       0.0   0.0  0.0  0.0      0.0      0.0         0.0       0.0     0.0   
4       0.0   0.0  0.0  0.0      0.0      0.0         0.0       0.0     0.0   
...     ...   ...  ...  ...      ...      ...         ...       ...     ...   
4188    0.0   0.0  0.0  0.0      0.0      0.0         0.0       0.0     0.0   
4189    0.0   0.0  0.0  0.0      0.0      0.0         0.0       0.0     0.0   
4190    0.0   0.0  0.0  0.0      0.0      0.0         0.0       0.0     0.0   
4191    0.0   0.0  0.0  0.0      0.0      0.0         0.0       0.0     0.0   
4192    0.0   0.0  0.0  0.0      0.0      0.0         0.0       0.0     0.0   

      abet  ...  zip  zog  zombie  zombiefied  zone

In [None]:
bad_words

Index(['_label', 'behaviour', 'bitch', 'cock', 'cunt', 'earn', 'exploit',
       'feminazi', 'hag', 'happiness', 'hoe', 'hole', 'honest', 'hypergamy',
       'logic', 'loyal', 'misogyny', 'oppress', 'pussy', 'resign', 'roast',
       'roastie', 'robot', 'romance', 'screw', 'shower', 'skank', 'slut',
       'slutty', 'status', 'strength', 'string', 'thots', 'tit', 'tranny',
       'virginity', 'west', 'whore', 'yo'],
      dtype='object')

In [None]:
sexist_comment_percent = len(Y_train.loc[Y_train== 1]) / len(Y_train) 
common_freq = train_freq.sum().loc[train_freq.sum() >= 3]
sexist = train_freq[train_freq["_label"] == 1].sum().loc[train_freq.sum() >= 3]
ratio  = sexist/common_freq.sort_values()
bad_words = ratio.loc[ratio > 1 - sexist_comment_percent + .05].index

def test_word(sentence : str):
    sexist = 0
    for word in sentence.split():
        if word in bad_words:
            sexist = 1
            
    return sexist

predict = []
print()
for comment in clean(X_test):
    predict.append(test_word(comment))
print(sum(predict))
print(bad_words)
print(bad_words)
print(f1_score(Y_test, predict, average = "weighted"))


189
0.7754395239894935


In [None]:
from sklearn.metrics import f1_score 

mid_words = ratio.loc[
    (1 - sexist_comment_percent + .05 > ratio) 
    & (ratio > 0.6)
].index
print(mid_words)
good_words = ratio.loc[sexist_comment_percent - .05 > ratio].index

def test_word2(sentence : str, badVal, midVal, cutoff):
    sexist = 0
    for word in sentence.split():
        if word in bad_words:
            sexist += badVal
        elif word in mid_words:
            sexist += midVal
    sexist = 1 if sexist >= cutoff else 0
    return sexist

predict2 = []
best_F1 = [0]
def find_best_variables():
    global best_F1
    for bad in range(0, 0): # 40, 5 3 5 F1 = 0.7904021016988467
        for mid in range(0, bad):
            for cut in range(5, 100):
                predict2 = []
                for comment in clean(X_test):
                    predict2.append(test_word2(comment, (bad/10), (mid/10), (cut/10)))
                f1 = f1_score(Y_test, predict2, average = "weighted")
                print(str(bad) + " " + str(mid) + " " + str(cut), end = "\r")
                if(f1 > best_F1[0]):
                    best_F1 = []
                    best_F1.append(f1)
                    best_F1.append([bad, mid, cut])
                    print(str(bad) + " " + str(mid) + " " + str(cut) + " F1 = " + str(f1))
find_best_variables()
print(best_F1)


Index(['17', '40', 'above', 'alpha', 'attention', 'british', 'bs', 'chad',
       'charge', 'cheating', 'club', 'completely', 'confident', 'disgusting',
       'equal', 'exist', 'exposed', 'failure', 'fantasy', 'feminism',
       'finally', 'greatest', 'gross', 'hahaha', 'handle', 'higher',
       'immediately', 'learn', 'lifetime', 'loved', 'low', 'lying', 'market',
       'marriage', 'mode', 'modern', 'motherhood', 'movies', 'normies',
       'obese', 'opposite', 'option', 'patriarchy', 'physically', 'rich',
       'screw', 'simp', 'sluts', 'species', 'stick', 'successful', 'sucked',
       'tits', 'tons', 'tranny', 'treat', 'turns', 'typical', 'unfortunately',
       'validation', 'victims', 'west', 'whenever', 'wing'],
      dtype='object')
[0]


In [None]:
#from sklearn.model_selection import train_test_split
#Our_X_Train, Our_X_Test = train_test_split(X_train, test_size=0.2, shuffle=False)
#Our_Y_Train, Our_Y_Test = train_test_split(Y_train, test_size=0.2, shuffle=False)

In [None]:
def find_best_ranges():
    f1_scores = []
    best_f1 = [0]
    for bad in range(0,15):
        for mid in range(6, int((1 - sexist_comment_percent + bad/100)*20)):
            test_range(bad/100, 0.05 * mid, f1_scores, best_f1)
    return f1_scores

def test_range(bad, mid, f1_scores, best_f1):
    bad_words = ratio.loc[ratio >= 1 - sexist_comment_percent + bad].index
    mid_words = ratio.loc[
    (1 - sexist_comment_percent + bad > ratio) 
    & (ratio > mid)].index
    

    for midVal in range(0,6):
        for cutoff in range(5,31):
            print(str(bad) + " " + str(mid) + " " + str(midVal) + " " + str(cutoff) + " ", end = "\r")
            predictions = []
            for comment in clean(X_train):
                predictions.append(test_word3(comment, midVal, cutoff, bad_words, mid_words))
            f1 = f1_score(Y_train, predictions, average = "weighted")
            f1_scores.append([f1, bad, mid, midVal, cutoff])
            if(f1 > best_f1[0]):
                best_f1[0] = f1
                print(str(f1) + " " + str(bad) + " " + str(mid) + " " + str(midVal) + " " + str(cutoff))
                
    
def test_word3(sentence : str, midVal, cutoff, bad_words, mid_words):
    sexist = 0
    for word in sentence.split():
        if word in bad_words:
            sexist += 10
        elif word in mid_words:
            sexist += midVal
    sexist = 1 if sexist >= cutoff else 0
    return sexist

print(find_best_ranges())

0.7789432354510586 0.0 0.30000000000000004 0 5
0.7829160467761055 0.0 0.35000000000000003 1 9
0.792841730559442 0.0 0.35000000000000003 1 10
0.7975281156078042 0.0 0.35000000000000003 1 11
Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/Users/elliothagyard/opt/anaconda3/envs/CSC_380/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3526, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/x4/xf2q_cf56pg5kt_qpw239_800000gn/T/ipykernel_52057/1302212820.py", line 39, in <module>
    print(find_best_ranges())
          ^^^^^^^^^^^^^^^^^^
  File "/var/folders/x4/xf2q_cf56pg5kt_qpw239_800000gn/T/ipykernel_52057/1302212820.py", line 6, in find_best_ranges
    test_range(bad/100, 0.05 * mid, f1_scores, best_f1)
  File "/var/folders/x4/xf2q_cf56pg5kt_qpw239_800000gn/T/ipykernel_52057/1302212820.py", line 21, in test_range
    predictions.append(test_word3(comment, midVal, cutoff, bad_words, mid_words))
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/var/folders/x4/xf2q_cf56pg5kt_qpw239_800000gn/T/ipykernel_52057/1302212820.py", line -1, in test_word3
KeyboardInterrupt

During handling of t

In [None]:
def find_best_ranges_neg():
    f1_scores = []
    best_f1 = [0]
    for bad in range(1,15):
        for mid in range(6, int((1 - sexist_comment_percent + bad/100)*20)):
            test_range(bad/-100, 0.05 * mid, f1_scores, best_f1)
    return f1_scores

neg_f1_scores = find_best_ranges_neg()
print(neg_f1_scores)

0.7789432354510586 -0.01 0.30000000000000004 0 5
0.7829160467761055 -0.01 0.35000000000000003 1 9
0.792841730559442 -0.01 0.35000000000000003 1 10
0.7975281156078042 -0.01 0.35000000000000003 1 11
0.8118695898697101 -0.01 0.4 1 5
0.8161997121844654 -0.01 0.45 2 5
-0.03 0.45 3 15 000000003 5 29 

In [None]:
# 0, 0.45, 2, 5

bad_words = ratio.loc[ratio >= 1 - sexist_comment_percent - 0.01].index
mid_words = ratio.loc[(1 - sexist_comment_percent - 0.01 > ratio) 
    & (ratio > 0.45)].index

predictions = []
for comment in clean(X_test):
    predictions.append(test_word3(comment, 2, 5, bad_words, mid_words))
    
print(f1_score(Y_test, predictions, average = "weighted"))

0.7880027200175773


In [None]:
from sklearn.metrics import classification_report
def eval_predictions(pred):
    print(classification_report(Y_test, pred))

import random
baseline = [0] * len(Y_test)
baseline2 = [int(random.uniform(0, 1) > .8) for i in predict]

print("BASE")
eval_predictions(baseline)
print("RAND")
# eval_predictions(baseline2)
# print("BAD")
eval_predictions(predict)
# print("MID")
# eval_predictions(predictions)
eval_predictions(predict3)

BASE
              precision    recall  f1-score   support

           0       0.73      1.00      0.84       789
           1       0.00      0.00      0.00       297

    accuracy                           0.73      1086
   macro avg       0.36      0.50      0.42      1086
weighted avg       0.53      0.73      0.61      1086

RAND
              precision    recall  f1-score   support

           0       0.81      0.93      0.87       789
           1       0.69      0.44      0.53       297

    accuracy                           0.79      1086
   macro avg       0.75      0.68      0.70      1086
weighted avg       0.78      0.79      0.78      1086

              precision    recall  f1-score   support

           0       0.81      0.96      0.88       789
           1       0.79      0.40      0.53       297

    accuracy                           0.81      1086
   macro avg       0.80      0.68      0.71      1086
weighted avg       0.81      0.81      0.78      1086



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
