In [None]:
import pandas as pd
import numpy as np
import tweepy

# Data collection

Here I get tweets based on their id using `tweepy` module. Then remove the tweets which were deleted. Then thos data that their tweets were removed and save new datasets as `csv` in data folder.

In [None]:
TRAIN_DATA_DIR = "data/isarcasm_train.csv"
TEST_DATA_DIR = "data/isarcasm_test.csv"

In [None]:
train_df = pd.read_csv(TRAIN_DATA_DIR)
test_df = pd.read_csv(TEST_DATA_DIR)

In [None]:
train_df.head()

Unnamed: 0,tweet_id,sarcasm_label,sarcasm_type
0,992251158159765504,not_sarcastic,
1,1084373658745876480,not_sarcastic,
2,812181186,not_sarcastic,
3,1090276741329928194,not_sarcastic,
4,553070692682723329,sarcastic,sarcasm


In [None]:
test_df.head()

Unnamed: 0,tweet_id,sarcasm_label,sarcasm_type
0,933721764970057729,sarcastic,sarcasm
1,1086119014131208193,not_sarcastic,
2,657671293756567552,not_sarcastic,
3,914242426755256320,not_sarcastic,
4,1088604537211047936,sarcastic,satire


In [None]:
consumer_key = 'XX'
consumer_key_secret = 'XX'
access_token = 'XX'
access_token_secret = 'XX'

In [None]:
auth = tweepy.OAuthHandler(consumer_key, consumer_key_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

In [None]:
def get_tweet(tweet_id):
    try:
        tweet_fetched = api.get_status(str(tweet_id))
        return tweet_fetched.text
    except:
        return None

In [None]:
tweets_body = train_df.apply(lambda row: get_tweet(row['tweet_id']), axis=1)

In [None]:
train_df['tweet_body'] = tweets_body

In [None]:
train_df.info

<bound method DataFrame.info of                  tweet_id  sarcasm_label sarcasm_type  \
0      992251158159765504  not_sarcastic          NaN   
1     1084373658745876480  not_sarcastic          NaN   
2               812181186  not_sarcastic          NaN   
3     1090276741329928194  not_sarcastic          NaN   
4      553070692682723329      sarcastic      sarcasm   
...                   ...            ...          ...   
3572   880621198811508737  not_sarcastic          NaN   
3573   977938081805414400  not_sarcastic          NaN   
3574  1145291654926147586      sarcastic        irony   
3575  1146574582272462853  not_sarcastic          NaN   
3576  1055962756380327937  not_sarcastic          NaN   

                                             tweet_body  
0                                                  None  
1     sorry but sue from the mediweight advert looks...  
2     @davesnyder since this is fantasy can it be an...  
3                                                  

In [None]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3577 entries, 0 to 3576
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   tweet_id       3577 non-null   int64 
 1   sarcasm_label  3577 non-null   object
 2   sarcasm_type   619 non-null    object
 3   tweet_body     2859 non-null   object
dtypes: int64(1), object(3)
memory usage: 111.9+ KB


In [None]:
train_df = train_df[train_df['tweet_body'].notna()]

In [None]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2859 entries, 1 to 3576
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   tweet_id       2859 non-null   int64 
 1   sarcasm_label  2859 non-null   object
 2   sarcasm_type   488 non-null    object
 3   tweet_body     2859 non-null   object
dtypes: int64(1), object(3)
memory usage: 111.7+ KB


In [None]:
tweets_body = test_df.apply(lambda row: get_tweet(row['tweet_id']), axis=1)

In [None]:
test_df['tweet_body'] = tweets_body 
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 907 entries, 0 to 906
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   tweet_id       907 non-null    int64 
 1   sarcasm_label  907 non-null    object
 2   sarcasm_type   158 non-null    object
 3   tweet_body     714 non-null    object
dtypes: int64(1), object(3)
memory usage: 28.5+ KB


In [None]:
test_df = test_df[test_df['tweet_body'].notna()]

In [None]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 714 entries, 1 to 906
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   tweet_id       714 non-null    int64 
 1   sarcasm_label  714 non-null    object
 2   sarcasm_type   121 non-null    object
 3   tweet_body     714 non-null    object
dtypes: int64(1), object(3)
memory usage: 27.9+ KB


In [None]:
train_df.to_csv(r'data/train.csv', index = False, header = True)
test_df.to_csv(r'data/test.csv', index = False, header = True)

In [None]:
max_len = 0
for i in range(len(train_df)):
    leng = len(train_df.iloc[i]["tweet_body"])
    if leng > max_len:
        max_len = leng
print(leng)

140


In [None]:
def extract_hashtags(text):
    hashtag_list = []
      
    for word in text.split():
        if word[0] == '#' and len(word) > 1 and not word[1].isdigit():
            hashtag_list.append(word[1:].split('.')[0].split('…')[0])
    
    return hashtag_list

In [None]:
from tqdm import tqdm

tqdm.pandas()

In [87]:
test_df = pd.read_csv('data/test.csv')
train_df = pd.read_csv('data/train.csv')

In [None]:
hashtags_train = train_df.progress_apply(lambda row: extract_hashtags(row['tweet_body']), axis=1)
hashtags_test = test_df.progress_apply(lambda row: extract_hashtags(row['tweet_body']), axis=1)
train_df['hashtags'] = hashtags_train
test_df['hashtags'] = hashtags_test

100%|████████████████████████████████████| 2859/2859 [00:00<00:00, 39651.99it/s]
100%|██████████████████████████████████████| 714/714 [00:00<00:00, 49413.14it/s]


In [None]:
import regex as re

emoji = re.compile('[\\u203C-\\u3299\\U0001F000-\\U0001F644]')

def extract_emojies(text):
    return re.findall(emoji, text)

In [None]:
emojies_train = train_df.progress_apply(lambda row: extract_emojies(row['tweet_body']), axis=1)
emojies_test = test_df.progress_apply(lambda row: extract_emojies(row['tweet_body']), axis=1)
train_df['emojies'] = emojies_train
test_df['emojies'] = emojies_test

100%|████████████████████████████████████| 2859/2859 [00:00<00:00, 47763.54it/s]
100%|██████████████████████████████████████| 714/714 [00:00<00:00, 29859.25it/s]


In [None]:
import re

def remove_links(text):
    return re.sub(r'\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*', '', text)

In [None]:
tweets_bodies_without_links_train = train_df.progress_apply(lambda row: remove_links(row['tweet_body']), axis=1)
tweets_bodies_without_links_test = test_df.progress_apply(lambda row: remove_links(row['tweet_body']), axis=1)
train_df['tweet_body'] = tweets_bodies_without_links_train
test_df['tweet_body'] = tweets_bodies_without_links_test

100%|████████████████████████████████████| 2859/2859 [00:00<00:00, 51057.92it/s]
100%|██████████████████████████████████████| 714/714 [00:00<00:00, 48704.35it/s]


In [None]:
train_df

Unnamed: 0,tweet_id,sarcasm_label,sarcasm_type,tweet_body,hashtags,emojies
0,1084373658745876480,not_sarcastic,,sorry but sue from the mediweight advert looks...,[],[]
1,812181186,not_sarcastic,,@davesnyder since this is fantasy can it be an...,[],[]
2,1090351571395899392,sarcastic,irony,"Good times, fun times here in Grand old Britan...",[],[]
3,914961092698353664,not_sarcastic,,I send light and love to all impacted by the #...,"[vegasshooting, VegasStrong]",[]
4,1080104089277681664,not_sarcastic,,Another year closer to death,[],[]
...,...,...,...,...,...,...
2854,1131854999246524416,not_sarcastic,,why did i think “knackered” was a swear word w...,[],[]
2855,1171093461111660546,not_sarcastic,,where are the best places to get an academic d...,[],[☹]
2856,1158305504575275009,not_sarcastic,,i accidentally just posted someone’s happy bir...,[],[]
2857,1145291654926147586,sarcastic,irony,I can confirm I am retiring from the consumpti...,[],[]


In [88]:
train_df['sarcasm_label'].replace({'sarcastic': 1, 'not_sarcastic': 0}, inplace=True)
test_df['sarcasm_label'].replace({'sarcastic': 1, 'not_sarcastic': 0}, inplace=True)

In [89]:
train_df['sarcasm_label'].value_counts()

0    2371
1     488
Name: sarcasm_label, dtype: int64

As we can see it's not balanced. This can be a #problem.

For this, we will compare the ratio with <a href="https://aclanthology.org/P19-1239/">Cai et al.(2019) papaer</a>.

In that paper we can see that the ration in train dataset is 8642/11174 which is nearly 0.77.

Here but the ration is 488/2371 which is nearly 0.2.

There is a huge difference.

## Model using BERT(ReBERTa)

### Setting the hyper parameters

|Parameter  	    |Ours  	    
|-	                |-	        
|Epochs  	        |1  	    
|Batch Size  	    |32  	    
|Seq Length  	    |75  	    
|Learning Rate      |5e-5       
|Weight decay       |1e-2       
|Warmup rate        |0.2        
|Gradient Clipping  |1.0        

Absoultely it is out start point and we try to make it better in the future.

In [91]:
train_args = {
    'reprocess_input_data': True,
    'overwrite_output_dir': True,
    'sliding_window': False,
    'max_seq_length': 75,
    'learning_rate': 0.00005,
    'weight_decay': 0.01,
    'warmup_ratio': 0.2,
    'max_grad_norm': 1.0,
    'num_train_epochs': 1,
    'train_batch_size': 32,
    'save_model_every_epoch': False,
    'save_steps': 4000,
    'fp16': True,
    'output_dir': '/outputs/',
    'evaluate_during_training': True,
}

As these days transformers are so popular we will use <a href="https://huggingface.co/roberta-base">roberta-base</a> model by hugging face.

<img src="https://huggingface.co/front/assets/huggingface_logo.svg">

#### What is ReBERTa base model?
Pretrained model on English language using a masked language modeling (MLM) objective.  It was introduced in <a href="https://arxiv.org/abs/1907.11692">this paper</a> and first released in <a href="https://github.com/pytorch/fairseq/tree/master/examples/roberta">this repository</a>. This model is case-sensitive: it makes a difference between english and English.

In [92]:
from simpletransformers.classification import ClassificationModel
import logging
import sklearn

logging.basicConfig(level=logging.DEBUG)
transformers_logger = logging.getLogger('transformers')
transformers_logger.setLevel(logging.WARNING)

In [94]:
model = ClassificationModel('roberta', 'roberta-base', num_labels=2, args=train_args) 

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /roberta-base/resolve/main/config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /roberta-base/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification

In [95]:
new_train_df = train_df.copy() 
new_test_df = test_df.copy()

In [96]:
new_train_df.drop(columns=['sarcasm_type', 'tweet_id'], inplace=True)
new_test_df.drop(columns=['sarcasm_type', 'tweet_id'], inplace=True)

In [97]:
new_train_df = new_train_df.rename({'tweet_body': 'text'}, axis=1) 
new_test_df = new_test_df.rename({'tweet_body': 'text'}, axis=1) 

In [98]:
new_train_df = new_train_df.rename({'sarcasm_label': 'labels'}, axis=1) 
new_test_df = new_test_df.rename({'sarcasm_label': 'labels'}, axis=1) 

In [99]:
cols = new_test_df.columns.to_list()
print(cols)
cols = cols[::-1]
print(cols)

['labels', 'text']
['text', 'labels']


In [100]:
new_train_df = new_train_df[cols]
new_test_df = new_test_df[cols]

In [101]:
new_train_df

Unnamed: 0,text,labels
0,sorry but sue from the mediweight advert looks...,0
1,@davesnyder since this is fantasy can it be an...,0
2,"Good times, fun times here in Grand old Britan...",1
3,I send light and love to all impacted by the #...,0
4,Another year closer to death,0
...,...,...
2854,why did i think “knackered” was a swear word w...,0
2855,where are the best places to get an academic d...,0
2856,i accidentally just posted someone’s happy bir...,0
2857,I can confirm I am retiring from the consumpti...,1


In [102]:
new_test_df

Unnamed: 0,text,labels
0,I knew as soon as I heard Doing Ford was cutti...,0
1,"Eating apple sauce, chicken thighs, broccoli, ...",0
2,The greatest crime in the world is not develop...,0
3,@DemetriusHarmon i get paid on friday and i’m ...,0
4,"No, please, no.\n\nOpinion: Hillary Will Run A...",0
...,...,...
709,Imagine that it's going to cost me 600 pound t...,0
710,"people really out here tryna argue ""you don't ...",0
711,@ArmyWP_Football and their relentless running ...,0
712,Why is it that whether I get out of bed at 6.4...,0


In [103]:
new_test_df.columns.to_list()

['text', 'labels']

In [104]:
msk = np.random.rand(len(new_train_df)) < 0.9
new_train_df_modified = new_train_df[msk] 
eval_df = new_train_df[~msk]

In [105]:
print(len(eval_df))
print(len(new_train_df_modified))

298
2561


In [106]:
model.train_model(new_train_df_modified, eval_df=eval_df)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/2561 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_train_roberta_75_2_2


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 0 of 1:   0%|          | 0/81 [00:00<?, ?it/s]

  model.parameters(), args.max_grad_norm
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/298 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_75_2_2
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to /outputs/.


(81,
 {'auprc': [0.24917649604117376],
  'auroc': [0.591307066916823],
  'eval_loss': [0.45024209273488897],
  'fn': [52],
  'fp': [0],
  'global_step': [81],
  'mcc': [0.0],
  'tn': [246],
  'tp': [0],
  'train_loss': [0.21875]})

In [107]:
result, model_outputs, wrong_predictions = model.eval_model(new_test_df, acc=sklearn.metrics.accuracy_score)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/714 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_75_2_2


Running Evaluation:   0%|          | 0/90 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
INFO:simpletransformers.classification.classification_model:{'mcc': 0.0, 'tp': 0, 'tn': 593, 'fp': 0, 'fn': 121, 'auroc': 0.623723049907321, 'auprc': 0.23275564539150412, 'eval_loss': 0.44201668633355035, 'acc': 0.8305322128851541}


AS we can see I got 83% accuracy, but the point here is that there is big problem here with postive labels. Out `fp` and `np` are zero which mean model just predicted positive.

So, to find out the reason I am going to use another to train my model with that first.

### Using new dataset

I am going to use this dataset which is provided by <a href="https://github.com/headacheboy/data-of-multimodal-sarcasm-detection">this repositry</a> which is for `Cai er al .(2019)` paper.

In [108]:
import csv
import urllib.request


def filtered(sentence):
  words = sentence.split()
  filter = ['sarcasm', 'sarcastic', 'reposting', '<url>', 'joke', 'humour', 'humor', 'jokes', 'irony', 'ironic', 'exgag']
  for filtered_word in filter:
    if filtered_word in words:
      return True
  return False


def download_and_clean(url, output_file, text_index, labels_index, to_filter=False):
  with open(output_file, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(['text', 'labels'])
    file = urllib.request.urlopen(url)
    for line in file:
      decoded_line = line.decode('utf-8')
      row = eval(decoded_line)
      if not to_filter or not filtered(row[text_index]):
        csv_writer.writerow([row[text_index], row[labels_index]])

The train, test, and validation datasets from the paper's Github data repository are now downloaded and pre-processed using the aforementioned methods. The result file is saved to the notebook's local storage as `train.csv`, `test.csv`, and `validate.csv`.

In [109]:
download_and_clean('https://raw.githubusercontent.com/headacheboy/data-of-multimodal-sarcasm-detection/master/text/train.txt', 'train.csv', 1, 2, to_filter=True)
download_and_clean('https://raw.githubusercontent.com/headacheboy/data-of-multimodal-sarcasm-detection/master/text/test2.txt', 'test.csv', 1, 3)
download_and_clean('https://raw.githubusercontent.com/headacheboy/data-of-multimodal-sarcasm-detection/master/text/valid2.txt', 'validate.csv', 1, 3)

In [110]:
train_df2 = pd.read_csv('train.csv')
test_df2 = pd.read_csv('test.csv')
validate_df2 = pd.read_csv('validate.csv')
train_df2.head()

Unnamed: 0,text,labels
0,<user> thanks for showing up for our appointme...,1
1,haha . # lol,1
2,i love waiting <num> min for a cab - such shor...,1
3,22 super funny quotes # funnyquotes # funnysa...,1
4,goog morning # sorrynotsorry # morning,1


In [127]:
model = ClassificationModel('roberta', 'roberta-base', num_labels=2, args=train_args) 

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /roberta-base/resolve/main/config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /roberta-base/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification

In [128]:
model.train_model(train_df2, eval_df=validate_df2)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/19816 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_train_roberta_75_2_2


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 0 of 1:   0%|          | 0/620 [00:00<?, ?it/s]

  model.parameters(), args.max_grad_norm
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/2410 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_75_2_2
INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to /outputs/.


(620,
 {'auprc': [0.9409724068275555],
  'auroc': [0.9705672043802807],
  'eval_loss': [0.3288929081022345],
  'fn': [23],
  'fp': [64],
  'global_step': [620],
  'mcc': [0.9257783912419192],
  'tn': [1387],
  'tp': [936],
  'train_loss': [0.00037638843059539795]})

In [129]:
result, model_outputs, wrong_predictions = model.eval_model(test_df2, acc=sklearn.metrics.accuracy_score)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/2409 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_75_2_2


Running Evaluation:   0%|          | 0/302 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.8751351185548726, 'tp': 908, 'tn': 1355, 'fp': 95, 'fn': 51, 'auroc': 0.9442030131962175, 'auprc': 0.8884163332015133, 'eval_loss': 0.5130235972191324, 'acc': 0.9393939393939394}


As we can see here is not like `iSarcasm dataset`. Let's also check `F1 score` here.

F1 = tp/(tp+(1/2 * (fp+fn))) = 0.92519084

Let's test it on our test dataframe.

In [130]:
result, model_outputs, wrong_predictions = model.eval_model(new_test_df, acc=sklearn.metrics.accuracy_score)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/714 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_75_2_2


Running Evaluation:   0%|          | 0/90 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': -0.024980025198240226, 'tp': 6, 'tn': 554, 'fp': 39, 'fn': 115, 'auroc': 0.4290970412386938, 'auprc': 0.1456865285353764, 'eval_loss': 1.7762877886494002, 'acc': 0.7843137254901961}


We do not have see good accuracy, but let's check the `F1 score` too.

F1 = tp/(tp+(1/2 * (fp+fn))) = 0.07

Terrible!

Let's train our trained model on the train dataset we had.

In [132]:
model.train_model(new_train_df_modified, eval_df=eval_df)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/2561 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_train_roberta_75_2_2


Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 0 of 1:   0%|          | 0/81 [00:00<?, ?it/s]

  model.parameters(), args.max_grad_norm
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/298 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_75_2_2
INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to /outputs/.


(81,
 {'auprc': [0.3097707667443644],
  'auroc': [0.6293777360850532],
  'eval_loss': [0.4573719125044973],
  'fn': [50],
  'fp': [2],
  'global_step': [81],
  'mcc': [0.10003658080225802],
  'tn': [244],
  'tp': [2],
  'train_loss': [0.0413818359375]})

In [133]:
result, model_outputs, wrong_predictions = model.eval_model(new_test_df, acc=sklearn.metrics.accuracy_score)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/714 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_75_2_2


Running Evaluation:   0%|          | 0/90 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.18320183753518407, 'tp': 7, 'tn': 591, 'fp': 2, 'fn': 114, 'auroc': 0.6742714590330717, 'auprc': 0.3679701820202982, 'eval_loss': 0.4275962405734592, 'acc': 0.8375350140056023}


It got better but it is far from what we want.

# Imbalance dataset problem