## Preparing the dataset

Import Emojis for Python to deal with comments with emojis

In [15]:
! pip install emoji --upgrade
import emoji
# Converts emojis to strings



In [16]:
import numpy as np
import pandas as pd

np.random.seed(42069)

Load the Training data

In [17]:
df = pd.read_csv('https://raw.githubusercontent.com/Alcoris0987/NLP-Project/main/olid-training-v1.0.tsv', sep='\t')
df

Unnamed: 0,id,tweet,subtask_a,subtask_b,subtask_c
0,86426,@USER She should ask a few native Americans wh...,OFF,UNT,
1,90194,@USER @USER Go home you’re drunk!!! @USER #MAG...,OFF,TIN,IND
2,16820,Amazon is investigating Chinese employees who ...,NOT,,
3,62688,"@USER Someone should'veTaken"" this piece of sh...",OFF,UNT,
4,43605,@USER @USER Obama wanted liberals &amp; illega...,NOT,,
...,...,...,...,...,...
13235,95338,@USER Sometimes I get strong vibes from people...,OFF,TIN,IND
13236,67210,Benidorm ✅ Creamfields ✅ Maga ✅ Not too sh...,NOT,,
13237,82921,@USER And why report this garbage. We don't g...,OFF,TIN,OTH
13238,27429,@USER Pussy,OFF,UNT,


Load tweet comments with emojis.

In [18]:
df.loc[13236,'tweet']

'Benidorm ✅  Creamfields ✅  Maga ✅   Not too shabby of a summer'

Test Emojis for Python and apply it to the training data.

In [19]:
print(emoji.demojize(df.loc[13236,'tweet']))

Benidorm :check_mark_button:  Creamfields :check_mark_button:  Maga :check_mark_button:   Not too shabby of a summer


In [20]:
empty = []
for row in df['tweet']:
  row = emoji.demojize(row)
  empty.append(row)
df['demojitweet'] = empty
df

Unnamed: 0,id,tweet,subtask_a,subtask_b,subtask_c,demojitweet
0,86426,@USER She should ask a few native Americans wh...,OFF,UNT,,@USER She should ask a few native Americans wh...
1,90194,@USER @USER Go home you’re drunk!!! @USER #MAG...,OFF,TIN,IND,@USER @USER Go home you’re drunk!!! @USER #MAG...
2,16820,Amazon is investigating Chinese employees who ...,NOT,,,Amazon is investigating Chinese employees who ...
3,62688,"@USER Someone should'veTaken"" this piece of sh...",OFF,UNT,,"@USER Someone should'veTaken"" this piece of sh..."
4,43605,@USER @USER Obama wanted liberals &amp; illega...,NOT,,,@USER @USER Obama wanted liberals &amp; illega...
...,...,...,...,...,...,...
13235,95338,@USER Sometimes I get strong vibes from people...,OFF,TIN,IND,@USER Sometimes I get strong vibes from people...
13236,67210,Benidorm ✅ Creamfields ✅ Maga ✅ Not too sh...,NOT,,,Benidorm :check_mark_button: Creamfields :che...
13237,82921,@USER And why report this garbage. We don't g...,OFF,TIN,OTH,@USER And why report this garbage. We don't g...
13238,27429,@USER Pussy,OFF,UNT,,@USER Pussy


In [21]:
df2 = df.drop(columns=['tweet'])
df2

Unnamed: 0,id,subtask_a,subtask_b,subtask_c,demojitweet
0,86426,OFF,UNT,,@USER She should ask a few native Americans wh...
1,90194,OFF,TIN,IND,@USER @USER Go home you’re drunk!!! @USER #MAG...
2,16820,NOT,,,Amazon is investigating Chinese employees who ...
3,62688,OFF,UNT,,"@USER Someone should'veTaken"" this piece of sh..."
4,43605,NOT,,,@USER @USER Obama wanted liberals &amp; illega...
...,...,...,...,...,...
13235,95338,OFF,TIN,IND,@USER Sometimes I get strong vibes from people...
13236,67210,NOT,,,Benidorm :check_mark_button: Creamfields :che...
13237,82921,OFF,TIN,OTH,@USER And why report this garbage. We don't g...
13238,27429,OFF,UNT,,@USER Pussy


Check for missing values and repetitions.

In [22]:
print(f"Missing values: {df['subtask_a'].isnull().values.any()}")
print(f"Repetitions: {len(df) != len(df['id'].unique())}")


Missing values: False
Repetitions: False


Convert offensive labels to 1 and not offensive ones to 0.

In [23]:
df['subtask_a'] = df['subtask_a'].map({'OFF': 1, 'NOT': 0})

In [24]:
from sklearn.model_selection import train_test_split, cross_val_score

X = df['tweet']
y = df['subtask_a']

X_train, X_test, y_train, y_test = train_test_split(X, y)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(9930,)
(3310,)
(9930,)
(3310,)


Import the relevant tools.

In [26]:
%matplotlib inline
import tensorflow as tf
from sklearn.model_selection import train_test_split
from ast import literal_eval
import re
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.multiclass import OneVsRestClassifier
from nltk.corpus import stopwords
import nltk
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
import seaborn as sns
import time

# Setup for Neural Model
Simple transformers have been known to yield notable results. We will compare the variations of BERT to see which works better.

Prepare the dataset to fit simple transformers

In [29]:
df2['subtask_a'] = df2['subtask_a'].map({'OFF': 1, 'NOT': 0})

In [30]:
df2

Unnamed: 0,id,subtask_a,subtask_b,subtask_c,demojitweet
0,86426,1,UNT,,@USER She should ask a few native Americans wh...
1,90194,1,TIN,IND,@USER @USER Go home you’re drunk!!! @USER #MAG...
2,16820,0,,,Amazon is investigating Chinese employees who ...
3,62688,1,UNT,,"@USER Someone should'veTaken"" this piece of sh..."
4,43605,0,,,@USER @USER Obama wanted liberals &amp; illega...
...,...,...,...,...,...
13235,95338,1,TIN,IND,@USER Sometimes I get strong vibes from people...
13236,67210,0,,,Benidorm :check_mark_button: Creamfields :che...
13237,82921,1,TIN,OTH,@USER And why report this garbage. We don't g...
13238,27429,1,UNT,,@USER Pussy


In [31]:
df3 = df2.drop(columns=['subtask_b', 'subtask_c', 'id'])
df3 = df3[['demojitweet', 'subtask_a']]
df3.columns = ["text_a", "labels"]
df3

Unnamed: 0,text_a,labels
0,@USER She should ask a few native Americans wh...,1
1,@USER @USER Go home you’re drunk!!! @USER #MAG...,1
2,Amazon is investigating Chinese employees who ...,0
3,"@USER Someone should'veTaken"" this piece of sh...",1
4,@USER @USER Obama wanted liberals &amp; illega...,0
...,...,...
13235,@USER Sometimes I get strong vibes from people...,1
13236,Benidorm :check_mark_button: Creamfields :che...,0
13237,@USER And why report this garbage. We don't g...,1
13238,@USER Pussy,1


Import the relevant tools

In [32]:
!pip install simpletransformers

Collecting simpletransformers
  Downloading simpletransformers-0.63.6-py3-none-any.whl (249 kB)
[K     |████████████████████████████████| 249 kB 5.0 MB/s 
[?25hCollecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[K     |████████████████████████████████| 43 kB 1.7 MB/s 
[?25hCollecting transformers>=4.6.0
  Downloading transformers-4.19.1-py3-none-any.whl (4.2 MB)
[K     |████████████████████████████████| 4.2 MB 42.4 MB/s 
Collecting wandb>=0.10.32
  Downloading wandb-0.12.16-py2.py3-none-any.whl (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 49.8 MB/s 
Collecting datasets
  Downloading datasets-2.2.1-py3-none-any.whl (342 kB)
[K     |████████████████████████████████| 342 kB 62.5 MB/s 
Collecting sentencepiece
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 50.6 MB/s 
Collecting tokenizers
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.man

In [33]:
# Import libraries
import pandas as pd
import numpy as np
import sklearn
from sklearn.metrics import classification_report
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import matplotlib.pyplot as plt 
import seaborn as sn 

Perform a stratified train test validate split

In [34]:
test_split = 0.2

train, test = train_test_split(
    df3,
    test_size = test_split,
    stratify = df3["labels"].values,
)
validate = test.sample(frac=0.5)
test.drop(validate.index, inplace=True)

print(f"Number of rows in training set: {len(train)}")
print(f"Number of rows in validation set: {len(validate)}")
print(f"Number of rows in test set: {len(test)}")

Number of rows in training set: 10592
Number of rows in validation set: 1324
Number of rows in test set: 1324


In [35]:
train

Unnamed: 0,text_a,labels
12356,@USER let me nigga fish in peace damn,1
6415,@USER By defending and making excuses for obvi...,0
5439,@USER He is the biggest LSU hater I’ve ever he...,0
2020,@USER @USER So is Fox News now campaigning for...,1
2672,@USER @USER @USER @USER @USER @USER @USER @USE...,0
...,...,...
3658,@USER The fact that she is disrespectful and b...,1
8532,@USER @USER Bitch I wanna know,1
12679,@USER @USER This is ridiculous. She is going t...,1
9492,@USER She is by far the most pretentious littl...,1


In [36]:
validate

Unnamed: 0,text_a,labels
8201,@USER @USER Because I knew it would flush out ...,0
10923,@USER @USER Is he? Does he know this? You su...,1
5336,@USER @USER Quick Preview Summary: Blame Demo...,1
8891,"@USER PPL need to remove the name trump"" &amp;...",0
6704,ANGELINA IS SO FUNNY AT RHE WRONG TIMES IMNGON...,0
...,...,...
481,@USER @USER @USER @USER @USER Most modern cons...,0
9279,@USER Go home. You are newspeak,0
88,@USER well...? URL,0
10222,@USER Baby Brennan boo who I don’t have a secu...,0


In [37]:
test

Unnamed: 0,text_a,labels
967,@USER Oh my heart! She is beautiful. MUCH more...,0
7726,@USER @USER Your logo looks an awful lot like ...,0
1804,@USER is the brainchild of @USER - He is playi...,1
2387,@USER Lmfaoo! :loudly_crying_face: bitch,1
7238,@USER @USER You love being on your knees....Yo...,0
...,...,...
13044,@USER fuck im wired,1
1234,@USER Dividing us is no longer working for hol...,0
2852,@USER They're referred to as #Antifa.,0
11274,@USER I wish liberals would keep their promises,0


# electra

In [42]:
# Model configuration # https://simpletransformers.ai/docs/usage/#configuring-a-simple-transformers-model 
model_args = ClassificationArgs()

model_args.overwrite_output_dir=True # overwrite existing saved models in the same directory
model_args.evaluate_during_training=True # to perform evaluation while training the model (eval data should be passed to the training method)

model_args.num_train_epochs=10
model_args.train_batch_size=64
model_args.learning_rate=1e-5 
model_args.max_seq_length=128 
model_args.sliding_window=True # tackle longer strings in the datasets.

# Early stopping to combat overfitting: https://simpletransformers.ai/docs/tips-and-tricks/#using-early-stopping
model_args.use_early_stopping=True
model_args.early_stopping_delta=0.01 # "The improvement over best_eval_loss necessary to count as a better checkpoint"
model_args.early_stopping_metric='eval_loss'
model_args.early_stopping_metric_minimize=True 
model_args.early_stopping_patience=5 
model_args.evaluate_during_training_steps=34 # how often you want to run validation in terms of training steps (or batches)

model_args.evaluate_during_training_silent=False
model_args.evaluate_each_epoch=False

With this configuration, the training will terminate if the eval_loss on the evaluation data does not improve upon the best eval_loss by at least 0.01 for 2 consecutive evaluations.

We tackle the issue of long text data from the comments by implementing sliding windows while keeping the maximum sequence length to its max of 512.

In [43]:
import torch

cuda_available = torch.cuda.is_available() # Uses the GPU when available but avoids errors when no GPU is available

In [44]:
# Create a ClassificationModel (model_type = 'bert', model_name = 'bert-base-cased'), more models: https://huggingface.co/transformers/pretrained_models.html
model = ClassificationModel('electra', 'google/electra-base-discriminator', args=model_args, use_cuda=cuda_available) 

Some weights of the model checkpoint at google/electra-base-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.d

Run the model. As previously mentioned, the evaluation data is passed.

In [45]:
_, history = model.train_model(train, eval_df=validate)

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/10592 [00:00<?, ?it/s]



Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/167 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/167 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/167 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/167 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/166 [00:00<?, ?it/s]

Test the model with the test data

In [46]:
result, model_outputs, wrong_predictions = model.eval_model(
    test, acc=sklearn.metrics.accuracy_score
)

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1324 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/167 [00:00<?, ?it/s]

In [47]:
result

{'acc': 0.7787009063444109,
 'eval_loss': 0.4936872014028583,
 'fn': 144,
 'fp': 149,
 'mcc': 0.4957626266990618,
 'tn': 747,
 'tp': 284}

Import and "Demojify" the test datas and combine their labels.

In [48]:
df_in = pd.read_csv('https://raw.githubusercontent.com/Alcoris0987/NLP-Project/main/labels/test_IN.csv')
df_in_label = pd.read_csv('https://raw.githubusercontent.com/Alcoris0987/NLP-Project/main/labels/test_IN_gold.csv')
empty = []
for row in df_in['tweet']:
  row = emoji.demojize(row)
  empty.append(row)
df_in['demojitweet'] = empty
df_in = df_in.drop(columns=['tweet'])
df_in_label['label'] = df_in_label['label'].map({'OFF': 1, 'NOT': 0})
df_in_full = pd.concat([df_in, df_in_label], axis=1)
df_in_full = df_in_full.drop(columns=['id', 'id'])
df_in_full

Unnamed: 0,demojitweet,label
0,#WhoIsQ #WheresTheServer #DumpNike #DECLASFISA...,1
1,"#ConstitutionDay is revered by Conservatives, ...",0
2,#FOXNews #NRA #MAGA #POTUS #TRUMP #2ndAmendmen...,0
3,#Watching #Boomer getting the news that she is...,0
4,#NoPasaran: Unity demo to oppose the far-right...,1
...,...,...
855,#DespicableDems lie again about rifles. Dem Di...,1
856,#MeetTheSpeakers :raising_hands: @USER will pr...,0
857,3 people just unfollowed me for talking about ...,1
858,#WednesdayWisdom Antifa calls the right fascis...,0


In [49]:
df_out1 = pd.read_csv('https://raw.githubusercontent.com/Alcoris0987/NLP-Project/main/labels/test_OUT1.csv')
df_out1_label = pd.read_csv('https://raw.githubusercontent.com/Alcoris0987/NLP-Project/main/labels/test_OUT1_gold.csv')
empty = []
for row in df_out1['tweet']:
  row = emoji.demojize(row)
  empty.append(row)
df_out1['demojitweet'] = empty
df_out1 = df_out1.drop(columns=['tweet'])
df_out1_label['label'] = df_out1_label['label'].map({'OFF': 1, 'NOT': 0})
df_out1_full = pd.concat([df_out1, df_out1_label], axis=1)
df_out1_full = df_out1_full.drop(columns=['id', 'id'])
df_out1_full

Unnamed: 0,demojitweet,label
0,Maybe it could have something to do with the r...,1
1,"Weirdly enough, i think about this fairly freq...",1
2,Your idea (while posing a real problem with et...,1
3,>it will encourage a part of the people you ba...,1
4,If something like this could make someone supp...,1
...,...,...
1202,[deleted],0
1203,Its a tweet about politics. Report it and have...,0
1204,Why? The Reddit CEO censors Reddit fine.,0
1205,"Dont worry, it did a pretty good job of censor...",0


In [50]:
df_out2 = pd.read_csv('https://raw.githubusercontent.com/Alcoris0987/NLP-Project/main/labels/test_OUT2.csv')
df_out2_label = pd.read_csv('https://raw.githubusercontent.com/Alcoris0987/NLP-Project/main/labels/test_OUT2_gold.csv')
empty = []
for row in df_out2['tweet']:
  row = emoji.demojize(row)
  empty.append(row)
df_out2['demojitweet'] = empty
df_out2 = df_out2.drop(columns=['tweet'])
df_out2_label['label'] = df_out2_label['label'].map({'OFF': 1, 'NOT': 0})
df_out2_full = pd.concat([df_out2, df_out2_label], axis=1)
df_out2_full = df_out2_full.drop(columns=['id', 'id'])
df_out2_full

Unnamed: 0,demojitweet,label
0,== Argh == \r\n\r\n Some random idiot deleted ...,1
1,(we hate america and we are going to bomb the ...,1
2,""" \r\n\r\n So, on the tenth anniversary of 9/1...",1
3,casualt i fucked your mum and i liked it !!ies,1
4,"""Welcome! \r\n\r\n Hello, , and welcome to Wik...",0
...,...,...
1195,""" \r\n\r\n == Arbitration enforcement editing ...",0
1196,u suck ...u suck...,1
1197,I will meet you one day and stab you to death ...,1
1198,==Vandalism== \r\n Please refrain from adding ...,0


In [51]:
df_tg = pd.read_csv('https://raw.githubusercontent.com/Alcoris0987/NLP-Project/main/labels/test_TEXTGAIN.csv')
df_tg_label = pd.read_csv('https://raw.githubusercontent.com/Alcoris0987/NLP-Project/main/labels/test_TEXTGAIN_gold.csv')
empty = []
for row in df_tg['tweet']:
  row = emoji.demojize(row)
  empty.append(row)
df_tg['demojitweet'] = empty
df_tg = df_tg.drop(columns=['tweet'])
df_tg_label['label'] = df_tg_label['label'].map({'OFF': 1, 'NOT': 0})
df_tg_full = pd.concat([df_tg, df_tg_label], axis=1)
df_tg_full = df_tg_full.drop(columns=['id', 'id'])
df_tg_full

Unnamed: 0,demojitweet,label
0,"a few American personality camels, well adjust...",0
1,@juniormufasa_ how come lukaku flop @ man uu ...,1
2,LUKAKU IS SUCH A NIGGER I HATE THIS APE,1
3,I beg you lot keep that porch monkey coon Shau...,0
4,TRY NOT TO GET HIGH AS THE MONKEY !!! :cross_m...,0
...,...,...
1271,Luka Trey Burke Luka Clippers Playoff P Champi...,0
1272,@RaheemKassam @realDonaldTrump Champions Leagu...,0
1273,@realDonaldTrump Lewandowski will score 3 goal...,0
1274,Hoping a superb clash between Lewandowski and ...,0


Evaluate the different test data

In [52]:
result, model_outputs, wrong_predictions = model.eval_model(
    df_in_full, acc=sklearn.metrics.accuracy_score
)

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/860 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/108 [00:00<?, ?it/s]

In [53]:
# For df_in 
result

{'acc': 0.827906976744186,
 'eval_loss': 0.41089921168707033,
 'fn': 89,
 'fp': 59,
 'mcc': 0.5575460834732362,
 'tn': 561,
 'tp': 151}

In [54]:
result, model_outputs, wrong_predictions = model.eval_model(
    df_out1_full, acc=sklearn.metrics.accuracy_score
)

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1207 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/156 [00:00<?, ?it/s]

In [55]:
# For df_out1 
result

{'acc': 0.6868268434134217,
 'eval_loss': 0.7561961442996294,
 'fn': 275,
 'fp': 103,
 'mcc': 0.3648947796048022,
 'tn': 561,
 'tp': 268}

In [56]:
result, model_outputs, wrong_predictions = model.eval_model(
    df_out2_full, acc=sklearn.metrics.accuracy_score
)

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1200 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (536 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (910 > 512). Running this sequence through the model will result in indexing errors


Running Evaluation:   0%|          | 0/251 [00:00<?, ?it/s]

In [57]:
# For df_out2 
result

{'acc': 0.885,
 'eval_loss': 0.3416697285564772,
 'fn': 67,
 'fp': 71,
 'mcc': 0.7700171116815026,
 'tn': 529,
 'tp': 533}

In [58]:
result, model_outputs, wrong_predictions = model.eval_model(
    df_tg_full, acc=sklearn.metrics.accuracy_score
)

  "Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels."


  0%|          | 0/1276 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/161 [00:00<?, ?it/s]

In [59]:
# For df_tg 
result

{'acc': 0.5634796238244514,
 'eval_loss': 0.9133275665851853,
 'fn': 73,
 'fp': 484,
 'mcc': 0.11849764524843064,
 'tn': 604,
 'tp': 115}

All yielded good results except for the textgain data. A significant number of false positives were found in that case. 