<a href="https://colab.research.google.com/github/MAbuTalha/Research-Paper/blob/main/Transformers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# verify GPU availability
import tensorflow as tf

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [2]:
import torch

print(torch.cuda.current_device())


print(torch.cuda.device(0))
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))
print(torch.cuda.is_available())

torch.cuda.empty_cache()

0
<torch.cuda.device object at 0x7f2ed29c5090>
1
Tesla T4
True


In [3]:
torch.cuda.memory_summary(device=None, abbreviated=False)



In [4]:
import pandas as pd
import numpy as np
# Set notebook mode to work in offline
import warnings
warnings.filterwarnings("ignore")
import re
import re
import string
import sklearn
import itertools


In [7]:
#!pip install simpletransformers
#from simpletransformers.classification import ClassificationModel


In [8]:
from simpletransformers.classification import ClassificationModel

# Upload Files

In [9]:
from google.colab import files
TestData = files.upload()

Saving test.csv to test.csv


In [13]:
from google.colab import files
TrainData = files.upload()

Saving train.csv to train.csv


In [10]:
for fn in TestData.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(TestData[fn])))

User uploaded file "test.csv" with length 484872 bytes


In [14]:
for fn in TrainData.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(TrainData[fn])))

User uploaded file "train.csv" with length 1956493 bytes


In [15]:
train_df =pd.read_csv('train.csv')

In [16]:
train_df

Unnamed: 0,text,label
0,And without justice &amp; rule of law states d...,1
1,For developing countries like Pakistan there a...,1
2,I want to congratulate President Xi Jinping &a...,1
3,Today I visited Nandana Fort where Al Buruni s...,1
4,I congratulate the entire nation &amp; salute ...,1
...,...,...
12395,"Big Match today, https://t.co/PhRQiWPliI",0
12396,"Strategic timeout, sub players ny naswar rakh...",0
12397,"First boundary and Zalmi fans, https://t.co/vm...",0
12398,"Shame on you Shimmy Shimmy,",0


In [11]:
test_df =pd.read_csv('test.csv')

In [12]:
test_df

Unnamed: 0,text,label
0,Shab-e-Meraj Mubarak to Muslims all over the w...,1
1,"Our Prophet PBUH said ""many nations before you...",1
2,I welcome restoration of the ceasefire along t...,1
3,We also demonstrated to the world Pakistanâ€™s...,1
4,I inaugurated Sri Lanka's High Performance Spo...,1
...,...,...
3030,"Lahore Qalanders, https://t.co/uaYHRJCjSt",0
3031,"Lahore Qalanders is slow poison, Rana Fawad",0
3032,59 plus 59 plus 59 and additional 3 runs requi...,0
3033,Strategic time out mein Lahore Qalanders Dubai...,0




# Data Pre-Process

In [17]:
contractions = { 
"ain't": "am not",
"aren't": "are not",
"can't": "cannot",
"can't've": "cannot have",
"'cause": "because",
"could've": "could have",
"couldn't": "could not",
"couldn't've": "could not have",
"didn't": "did not",
"doesn't": "does not",
"don't": "do not",
"hadn't": "had not",
"hadn't've": "had not have",
"hasn't": "has not",
"haven't": "have not",
"he'd": "he would",
"he'd've": "he would have",
"he'll": "he will",
"he's": "he is",
"how'd": "how did",
"how'll": "how will",
"how's": "how is",
"i'd": "i would",
"i'll": "i will",
"i'm": "i am",
"i've": "i have",
"isn't": "is not",
"it'd": "it would",
"it'll": "it will",
"it's": "it is",
"let's": "let us",
"ma'am": "madam",
"mayn't": "may not",
"might've": "might have",
"mightn't": "might not",
"must've": "must have",
"mustn't": "must not",
"needn't": "need not",
"oughtn't": "ought not",
"shan't": "shall not",
"sha'n't": "shall not",
"she'd": "she would",
"she'll": "she will",
"she's": "she is",
"should've": "should have",
"shouldn't": "should not",
"that'd": "that would",
"that's": "that is",
"there'd": "there had",
"there's": "there is",
"they'd": "they would",
"they'll": "they will",
"they're": "they are",
"they've": "they have",
"wasn't": "was not",
"we'd": "we would",
"we'll": "we will",
"we're": "we are",
"we've": "we have",
"weren't": "were not",
"what'll": "what will",
"what're": "what are",
"what's": "what is",
"what've": "what have",
"where'd": "where did",
"where's": "where is",
"who'll": "who will",
"who's": "who is",
"won't": "will not",
"wouldn't": "would not",
"you'd": "you would",
"you'll": "you will",
"you're": "you are",
"thx"   : "thanks"
}

In [18]:
def remove_contractions(text):
    return contractions[text.lower()] if text.lower() in contractions.keys() else text

In [19]:
train_df.text=train_df.text.apply(remove_contractions)
test_df.text=test_df.text.apply(remove_contractions)

In [20]:
def clean_dataset(text):
    # Remove hashtag while keeping hashtag text
    text = re.sub(r'#','', text)
    # Remove HTML special entities (e.g. &amp;)
    text = re.sub(r'\&\w*;', '', text)
    # Remove tickers
    text = re.sub(r'\$\w*', '', text)
    # Remove hyperlinks
    text = re.sub(r'https?:\/\/.*\/\w*', '', text)
    # Remove whitespace (including new line characters)
    text = re.sub(r'\s\s+','', text)
    text = re.sub(r'[ ]{2, }',' ',text)
    # Remove URL, RT, mention(@)
    text=  re.sub(r'http(\S)+', '',text)
    text=  re.sub(r'http ...', '',text)
    text=  re.sub(r'(RT|rt)[ ]*@[ ]*[\S]+','',text)
    text=  re.sub(r'RT[ ]?@','',text)
    text = re.sub(r'@[\S]+','',text)
    # Remove words with 4 or fewer letters
    text = re.sub(r'\b\w{1,4}\b', '', text)
    #&, < and >
    text = re.sub(r'&amp;?', 'and',text)
    text = re.sub(r'&lt;','<',text)
    text = re.sub(r'&gt;','>',text)
    # Remove characters beyond Basic Multilingual Plane (BMP) of Unicode:
    text= ''.join(c for c in text if c <= '\uFFFF') 
    text = text.strip()
    # Remove misspelling words
    text = ''.join(''.join(s)[:2] for _, s in itertools.groupby(text))
    # Remove emoji
    text = emoji.demojize(text)
    text = text.replace(":"," ")
    text = ' '.join(text.split()) 
    text = re.sub("([^\x00-\x7F])+"," ",text)
    # Remove Mojibake (also extra spaces)
    text = ' '.join(re.sub("[^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a]", " ", text).split())
    return text

In [22]:
#!pip install emoji
import emoji 
train_df.text=train_df.text.apply(clean_dataset)
test_df.text=test_df.text.apply(clean_dataset)

Collecting emoji
[?25l  Downloading https://files.pythonhosted.org/packages/24/fa/b3368f41b95a286f8d300e323449ab4e86b85334c2e0b477e94422b8ed0f/emoji-1.2.0-py3-none-any.whl (131kB)
[K     |██▌                             | 10kB 22.9MB/s eta 0:00:01[K     |█████                           | 20kB 15.1MB/s eta 0:00:01[K     |███████▌                        | 30kB 13.3MB/s eta 0:00:01[K     |██████████                      | 40kB 12.6MB/s eta 0:00:01[K     |████████████▌                   | 51kB 8.6MB/s eta 0:00:01[K     |███████████████                 | 61kB 7.9MB/s eta 0:00:01[K     |█████████████████▌              | 71kB 8.9MB/s eta 0:00:01[K     |████████████████████            | 81kB 9.8MB/s eta 0:00:01[K     |██████████████████████▌         | 92kB 9.5MB/s eta 0:00:01[K     |█████████████████████████       | 102kB 8.3MB/s eta 0:00:01[K     |███████████████████████████▌    | 112kB 8.3MB/s eta 0:00:01[K     |██████████████████████████████  | 122kB 8.3MB/s eta 0:0

In [23]:
X_train_clean= train_df.text
X_test_clean = test_df.text
y_train_clean= train_df.label
y_test_clean = test_df.label

In [24]:
train_df_clean = pd.concat([X_train_clean, y_train_clean], axis=1)
print("Shape of training data set: ", train_df_clean.shape)
print("View of data set: ", train_df_clean.head())

Shape of training data set:  (12400, 2)
View of data set:                                                  text  label
0  without justicerule states disintegrate becaus...      1
1  developing countries Pakistan there lessons le...      1
2  congratulate President Jinpingthe Chinese mome...      1
3  Today visited Nandana where Buruni spent years...      1
4  congratulate entire nationsalute Armed Forces ...      1


In [44]:
eval_df_clean = pd.concat([X_test_clean, y_test_clean], axis=1)
print("Shape of Eval data set: ", eval_df_clean.shape)

Shape of Eval data set:  (3035, 2)


# BERT

In [None]:
bert_train_args = {
    'learning_rate':5e-5,
    'evaluate_during_training': True,
    'logging_steps': 100,
    'num_train_epochs': 1,
    'evaluate_during_training_steps': 100,
    'save_eval_checkpoints': False,
    'train_batch_size': 32,
    'eval_batch_size': 64,
    'overwrite_output_dir': True,
    'output_dir':'D:/Output-Bert-Test',
    'fp16': False,
    'n_gpu':1,
    'wandb_project': "Bert-Model-Test"
}

In [None]:
model_BertTest = ClassificationModel('bert', 'bert-base-cased', num_labels=2, use_cuda=True, cuda_device=0, args=bert_train_args)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=435779157.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=213450.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=29.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=435797.0, style=ProgressStyle(descripti…




In [None]:
model_BertTest.train_model(train_df_clean, eval_df=eval_df_clean)

HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=1.0, style=ProgressStyle(description_width='i…

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


wandb: Paste an API key from your profile and hit enter: ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 1', max=388.0, style=ProgressStyle(des…





(388,
 {'auprc': [0.9637922121024919,
   0.9723149065305086,
   0.9777347235634581,
   0.9790856989947447],
  'auroc': [0.9567460583026169,
   0.9677961085814757,
   0.9737804660767627,
   0.9754300645846496],
  'eval_loss': [0.2633437099478518,
   0.25683532766561257,
   0.2120604635468529,
   0.2039970917200359],
  'fn': [174, 112, 175, 136],
  'fp': [143, 211, 88, 123],
  'global_step': [100, 200, 300, 388],
  'mcc': [0.7912340512744837,
   0.7883140674342394,
   0.8281902174404301,
   0.8292724196279101],
  'tn': [1334, 1266, 1389, 1354],
  'tp': [1384, 1446, 1383, 1422],
  'train_loss': [0.4956423342227936,
   0.12316792458295822,
   0.10122314095497131,
   0.16261596977710724]})

In [None]:
result, model_outputs, wrong_predictions = model_BertTest.eval_model(eval_df_clean)

HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=48.0, style=ProgressStyle(descri…




VBox(children=(Label(value=' 0.03MB of 0.03MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.10122
lr,1e-05
global_step,388.0
_runtime,661.0
_timestamp,1615968520.0
_step,6.0
tp,1422.0
tn,1354.0
fp,123.0
fn,136.0


0,1
Training loss,█▁▁
lr,█▅▁
global_step,▁▁▃▃▆▆█
_runtime,▁▂▃▄▅▆█
_timestamp,▁▂▃▄▅▆█
_step,▁▂▃▅▆▇█
tp,▁█▁▅
tn,▅▁█▆
fp,▄█▁▃
fn,█▁█▄


In [None]:
from sklearn.metrics import f1_score,accuracy_score,classification_report, confusion_matrix

In [None]:
predictions = []
for x in model_outputs:
    predictions.append(np.argmax(x))

print('f1 score:', f1_score(eval_df_clean['label'], predictions))
print('Accuracy score:', accuracy_score(eval_df_clean['label'], predictions))

f1 score: 0.9165323880116015
Accuracy score: 0.9146622734761121


# **RoBerta**

In [26]:
Roberta_train_args = {
    'learning_rate':5e-5,
    'evaluate_during_training': True,
    'logging_steps': 100,
    'num_train_epochs': 1,
    'evaluate_during_training_steps': 100,
    'save_eval_checkpoints': False,
    'train_batch_size': 32,
    'eval_batch_size': 64,
    'overwrite_output_dir': True,
    'output_dir':'D:/Output-RoBerta-Test',
    'fp16': False,
    'n_gpu':1,
    'wandb_project': "RoBerta-Model-Test"
}

In [28]:
model_RoBertaTest = ClassificationModel('roberta', 'roberta-base', num_labels=2, use_cuda=True, cuda_device=0, args=Roberta_train_args)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=481.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=501200538.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=898823.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1355863.0, style=ProgressStyle(descript…




In [29]:
model_RoBertaTest.train_model(train_df_clean, eval_df=eval_df_clean)

HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=1.0, style=ProgressStyle(description_width='i…

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


wandb: Paste an API key from your profile and hit enter: ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 1', max=388.0, style=ProgressStyle(des…





(388,
 {'auprc': [0.9532820756734084,
   0.9706060738113343,
   0.9745665957434567,
   0.9763346240642646],
  'auroc': [0.9440874756536469,
   0.9661252165206682,
   0.9701820729143399,
   0.9727894467413478],
  'eval_loss': [0.3041374350771851,
   0.24041335157623203,
   0.22409798576457737,
   0.2159054933775527],
  'fn': [211, 169, 156, 139],
  'fp': [170, 150, 135, 137],
  'global_step': [100, 200, 300, 388],
  'mcc': [0.7492048645907181,
   0.7897682679160123,
   0.808248893335209,
   0.8179993877701902],
  'tn': [1307, 1327, 1342, 1340],
  'tp': [1347, 1389, 1402, 1419],
  'train_loss': [0.2912836968898773,
   0.32314732670783997,
   0.3600708246231079,
   0.2546045780181885]})

In [30]:
result, model_outputs, wrong_predictions = model_RoBertaTest.eval_model(eval_df_clean)

HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=48.0, style=ProgressStyle(descri…




VBox(children=(Label(value=' 0.03MB of 0.03MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.36007
lr,1e-05
global_step,388.0
_runtime,388.0
_timestamp,1615975157.0
_step,6.0
tp,1419.0
tn,1340.0
fp,137.0
fn,139.0


0,1
Training loss,▁▄█
lr,█▅▁
global_step,▁▁▃▃▆▆█
_runtime,▁▁▃▄▅▆█
_timestamp,▁▁▃▄▅▆█
_step,▁▂▃▅▆▇█
tp,▁▅▆█
tn,▁▅██
fp,█▄▁▁
fn,█▄▃▁


In [31]:
from sklearn.metrics import f1_score,accuracy_score,classification_report, confusion_matrix

predictions = []
for x in model_outputs:
    predictions.append(np.argmax(x))

print('f1 score:', f1_score(eval_df_clean['label'], predictions))
print('Accuracy score:', accuracy_score(eval_df_clean['label'], predictions))

f1 score: 0.9113680154142582
Accuracy score: 0.9090609555189456


# XLNet

In [32]:
XLNet_train_args = {
    'learning_rate':5e-5,
    'evaluate_during_training': True,
    'logging_steps': 100,
    'num_train_epochs': 1,
    'evaluate_during_training_steps': 100,
    'save_eval_checkpoints': False,
    'train_batch_size': 32,
    'eval_batch_size': 64,
    'overwrite_output_dir': True,
    'output_dir':'D:/Output-XLNet-Test',
    'fp16': False,
    'n_gpu':1,
    'wandb_project': "XLNet-Model-Test"
}

model_XLNetTest = ClassificationModel('xlnet', 'xlnet-base-cased', num_labels=2, use_cuda=True, cuda_device=0, args=XLNet_train_args)
model_XLNetTest.train_model(train_df_clean, eval_df=eval_df_clean)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=760.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=467042463.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=798011.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1382015.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Epoch', max=1.0, style=ProgressStyle(description_width='i…

VBox(children=(Label(value=' 0.02MB of 0.02MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,3
_timestamp,1615975262
_step,2


0,1
_runtime,▁▁▁
_timestamp,▁▁▁
_step,▁▅█


HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 1', max=388.0, style=ProgressStyle(des…





(388,
 {'auprc': [0.9543671811139172,
   0.9706104378798261,
   0.9752064617506035,
   0.9767053809881437],
  'auroc': [0.9475935243263633,
   0.9652854248672195,
   0.9708656394193205,
   0.972918946308089],
  'eval_loss': [0.2967056491518936,
   0.23997145967102065,
   0.22378272530355994,
   0.2131235452131174],
  'fn': [284, 249, 193, 167],
  'fp': [102, 70, 96, 110],
  'global_step': [100, 200, 300, 388],
  'mcc': [0.751664329656208,
   0.7958254810340718,
   0.8114002869950933,
   0.8180928744877279],
  'tn': [1375, 1407, 1381, 1367],
  'tp': [1274, 1309, 1365, 1391],
  'train_loss': [0.40368014574050903,
   0.2744615375995636,
   0.25409039855003357,
   0.3600708842277527]})

In [33]:
result, model_outputs, wrong_predictions = model_XLNetTest.eval_model(eval_df_clean)

HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=48.0, style=ProgressStyle(descri…




VBox(children=(Label(value=' 0.02MB of 0.02MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,0.25409
lr,1e-05
global_step,388.0
_runtime,542.0
_timestamp,1615976782.0
_step,6.0
tp,1391.0
tn,1367.0
fp,110.0
fn,167.0


0,1
Training loss,█▂▁
lr,█▅▁
global_step,▁▁▃▃▆▆█
_runtime,▁▂▃▄▅▆█
_timestamp,▁▂▃▄▅▆█
_step,▁▂▃▅▆▇█
tp,▁▃▆█
tn,▂█▃▁
fp,▇▁▆█
fn,█▆▃▁


In [34]:
from sklearn.metrics import f1_score,accuracy_score,classification_report, confusion_matrix

predictions = []
for x in model_outputs:
    predictions.append(np.argmax(x))

print('f1 score:', f1_score(eval_df_clean['label'], predictions))
print('Accuracy score:', accuracy_score(eval_df_clean['label'], predictions))

f1 score: 0.909447531873161
Accuracy score: 0.9087314662273476


# T5

In [39]:
from simpletransformers.t5 import T5Model
T5_train_args = {
    'learning_rate':5e-5,
    'evaluate_during_training': True,
    'logging_steps': 100,
    'num_train_epochs': 1,
    'evaluate_during_training_steps': 100,
    'save_eval_checkpoints': False,
    'train_batch_size': 32,
    'eval_batch_size': 64,
    'overwrite_output_dir': True,
    'output_dir':'D:/Output-T5-Test',
    'fp16': False,
    'n_gpu':1,
    'wandb_project': "T5-Model-Test"
}

In [41]:
model_T5Test = T5Model('t5', 't5-base', num_labels=2, use_cuda=True, cuda_device=0, args=T5_train_args)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1199.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=891691430.0, style=ProgressStyle(descri…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1389353.0, style=ProgressStyle(descript…




In [1]:
model_T5Test.train_model(train_df_clean, eval_data=eval_df_clean)

NameError: ignored

In [None]:
result, model_outputs, wrong_predictions = model_T5Test.eval_model(eval_df_clean)

In [None]:
from sklearn.metrics import f1_score,accuracy_score,classification_report, confusion_matrix

predictions = []
for x in model_outputs:
    predictions.append(np.argmax(x))

print('f1 score:', f1_score(eval_df_clean['label'], predictions))
print('Accuracy score:', accuracy_score(eval_df_clean['label'], predictions))