## Model

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
!pip install transformers -q

[K     |████████████████████████████████| 4.2 MB 5.2 MB/s 
[K     |████████████████████████████████| 596 kB 63.8 MB/s 
[K     |████████████████████████████████| 84 kB 3.4 MB/s 
[K     |████████████████████████████████| 6.6 MB 63.2 MB/s 
[?25h

In [3]:
from transformers import BertModel, BertTokenizerFast
import torch
import torch.nn as nn
from pprint import  pprint
import re
from collections import Counter
import pandas as pd

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
df = pd.read_csv("/content/gdrive/MyDrive/titans_milestone3/codebase/Task-TC/processed_data/train_TC_dataset_preprocessed.csv")

In [5]:
df.head()

Unnamed: 0,Id,Sentence,Span_text,ratio,Span,Span_start,Span_end,class,preprocessed_sent,preprocessed_span
0,111111111,Geneva - The World Health Organisation chief o...,appeared,32.0,"(149, 157)",149,157,Doubt,geneva the world health organisation chief on ...,appeared
1,111111111,"""The next transmission could be more pronounce...",The next transmission could be more pronounced...,2.666667,"(265, 323)",265,323,Appeal_to_Authority,the next transmission could be more pronounced...,the next transmission could be more pronounced...
2,111111111,"But Tedros voiced alarm that ""plague in Madaga...","a very, very different",4.25,"(1069, 1091)",1069,1091,Repetition,but tedros voiced alarm that plague in madagas...,a very very different
3,111111111,He also pointed to the presence of the pneumon...,He also pointed to the presence of the pneumon...,1.0,"(1334, 1462)",1334,1462,Appeal_to_fear-prejudice,he also pointed to the presence of the pneumon...,he also pointed to the presence of the pneumon...
4,111111111,He praised the rapid response from WHO and Mad...,but warned that the danger was not over,3.125,"(1577, 1616)",1577,1616,Appeal_to_fear-prejudice,he praised the rapid response from who and mad...,but warned that the danger was not over


In [6]:
possible_labels = df['class'].unique()

label_dict = {}
for index, possible_label in enumerate(possible_labels):
    label_dict[possible_label] = index
label_dict

{'Appeal_to_Authority': 1,
 'Appeal_to_fear-prejudice': 3,
 'Bandwagon,Reductio_ad_hitlerum': 12,
 'Black-and-White_Fallacy': 5,
 'Causal_Oversimplification': 9,
 'Doubt': 0,
 'Exaggeration,Minimisation': 11,
 'Flag-Waving': 7,
 'Loaded_Language': 6,
 'Name_Calling,Labeling': 8,
 'Repetition': 2,
 'Slogans': 4,
 'Thought-terminating_Cliches': 13,
 'Whataboutism,Straw_Men,Red_Herring': 10}

In [7]:
df['label'] = df['class'].replace(label_dict)

In [8]:
df.head()

Unnamed: 0,Id,Sentence,Span_text,ratio,Span,Span_start,Span_end,class,preprocessed_sent,preprocessed_span,label
0,111111111,Geneva - The World Health Organisation chief o...,appeared,32.0,"(149, 157)",149,157,Doubt,geneva the world health organisation chief on ...,appeared,0
1,111111111,"""The next transmission could be more pronounce...",The next transmission could be more pronounced...,2.666667,"(265, 323)",265,323,Appeal_to_Authority,the next transmission could be more pronounced...,the next transmission could be more pronounced...,1
2,111111111,"But Tedros voiced alarm that ""plague in Madaga...","a very, very different",4.25,"(1069, 1091)",1069,1091,Repetition,but tedros voiced alarm that plague in madagas...,a very very different,2
3,111111111,He also pointed to the presence of the pneumon...,He also pointed to the presence of the pneumon...,1.0,"(1334, 1462)",1334,1462,Appeal_to_fear-prejudice,he also pointed to the presence of the pneumon...,he also pointed to the presence of the pneumon...,3
4,111111111,He praised the rapid response from WHO and Mad...,but warned that the danger was not over,3.125,"(1577, 1616)",1577,1616,Appeal_to_fear-prejudice,he praised the rapid response from who and mad...,but warned that the danger was not over,3


In [9]:
#split training data
from sklearn.model_selection import train_test_split
p_cls_train, p_cls_test = train_test_split(df, test_size=0.2, random_state=42)

In [10]:
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [11]:
train_data = [(p_cls_train['Sentence'].values[i],p_cls_train['Span_text'].values[i]) for i in range(len(p_cls_train['Sentence'].values))]
test_data = [(p_cls_test['Sentence'].values[i],p_cls_test['Span_text'].values[i]) for i in range(len(p_cls_test['Sentence'].values))]

In [12]:
train_enc = tokenizer.batch_encode_plus(train_data, padding="longest", truncation=True, max_length=100, return_tensors="pt")
test_enc = tokenizer.batch_encode_plus(test_data, padding="longest", truncation=True, max_length=100, return_tensors="pt")

In [13]:
train_enc.keys()

dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])

In [14]:
train_enc["input_ids"][2]

tensor([  101,  2045,  2003,  2061,  2172,  8902, 24117,  2085,  1010,  2007,
         2035,  2122,  2601,  1998, 24060,  8973,  1012,   102,  2601,  1998,
        24060,  8973,   102,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0])

In [15]:
tokenizer.decode(train_enc["input_ids"][2])

'[CLS] there is so much collusion now, with all these dark and menacing developments. [SEP] dark and menacing developments [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'

In [16]:
train_enc["token_type_ids"][2]

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0])

In [17]:
train_enc["attention_mask"][2]

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0])

In [20]:
train_input_ids, train_type_ids, train_attn_mask = train_enc.input_ids, train_enc.token_type_ids, train_enc.attention_mask
test_input_ids, test_type_ids, test_attn_mask = test_enc.input_ids, test_enc.token_type_ids, test_enc.attention_mask

In [21]:
train_input_ids.shape, train_type_ids.shape, train_attn_mask.shape

(torch.Size([4902, 100]), torch.Size([4902, 100]), torch.Size([4902, 100]))

In [23]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

# Below we define a function to create train, test & valid dataloaders in Pytorch

batch_size = 10

def get_dataloader(input_ids, type_ids, attn_mask, y):
    data = TensorDataset(input_ids, type_ids, attn_mask, y)
    sampler = RandomSampler(data)
    dataloader = DataLoader(data, sampler=sampler, batch_size=batch_size)
    return dataloader

train_datalaoder = get_dataloader(train_input_ids, train_type_ids, train_attn_mask, torch.tensor(p_cls_train['label'].values))
test_datalaoder = get_dataloader(test_input_ids, test_type_ids, test_attn_mask, torch.tensor(p_cls_test['label'].values))

In [24]:
# Sanity check that the tensors returned by the dataloader are correct

for batch in train_datalaoder:
    input_ids, type_ids, attn_mask, y = batch
    print(input_ids.shape, type_ids.shape, attn_mask.shape, y.shape)
    break


torch.Size([10, 100]) torch.Size([10, 100]) torch.Size([10, 100]) torch.Size([10])


In [25]:
class BERTClassifier(nn.Module):
    def __init__(self, transformer):
        super().__init__()
        self.transformer = transformer
        self.linear = nn.Linear(768, 14)
        self.drop = nn.Dropout(p=0.1)

    def forward(self, in_ids, type_ids, attn_mask):
        op = self.transformer(input_ids=in_ids, attention_mask=attn_mask, 
                              token_type_ids=type_ids)

        return  self.linear(self.drop(op["pooler_output"]))

In [26]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


In [27]:
transformer = BertModel.from_pretrained("bert-base-uncased")
model = BERTClassifier(transformer).to(device)
print(f'The model has {count_parameters(model):,} trainable parameters')

Downloading:   0%|          | 0.00/420M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


The model has 109,493,006 trainable parameters


In [None]:
# for name, param in model.named_parameters():
#     if "pooler" in name or "linear" in name:#or "layer.11" in name or "layer.10" in name or "linear" in name:
#         param.requires_grad = True
#     else:
#         param.requires_grad = False
#     print(name, param.shape, param.requires_grad)

# print(f'The model has {count_parameters(model):,} trainable parameters')

In [28]:
# Training function: Performs forward propagation, backpropagation & optimization.
# We also implement gradient clipping, which prevents the gradients from exploding

def train(model, dataloader, optimizer, criterion, clip=1.0):

    model.train()

    ep_t_loss = 0
    batch_num  = 0
    pred, tgt = [], []
    for ix, batch in tqdm(enumerate(dataloader)):
        batch = tuple(t.to(device) for t in batch)
        input_ids, type_ids, attn_mask, y = batch
        
        optimizer.zero_grad()
        output = model(input_ids, type_ids, attn_mask)     
        loss = criterion(output, y)
        loss.backward()

        #gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()

        ep_t_loss += loss.item()
        batch_num += 1
        pred.extend(torch.argmax(output, -1).tolist())
        tgt.extend(y.tolist())

    return ep_t_loss/batch_num, metrics.f1_score(tgt, pred, average='macro')

# Evaluation function: Calculates loss on the validation data.
from sklearn import metrics

def evaluate(model, dataloader, criterion):

    model.eval()

    ep_t_loss = 0
    batch_num  = 0
    pred, tgt = [], []
    for ix, batch in enumerate(dataloader):
        batch = tuple(t.to(device) for t in batch)
        input_ids, type_ids, attn_mask, y = batch

        with torch.no_grad():
            output = model(input_ids, type_ids, attn_mask)
            
            loss = criterion(output, y)

            ep_t_loss += loss.item()
            batch_num += 1
            pred.extend(torch.argmax(output, -1).tolist())
            tgt.extend(y.tolist())
        
    return ep_t_loss/batch_num, metrics.f1_score(tgt, pred, average='macro'), pred, tgt

In [29]:
criterion = torch.nn.CrossEntropyLoss()
optim = torch.optim.AdamW(model.parameters(), lr = 3e-5)

In [30]:
best_valid_loss = float('inf')
tot_t_loss, tot_v_loss =[],[]
N_EPOCHS = 3

In [31]:
import time
from tqdm import tqdm

for epoch in tqdm(range(N_EPOCHS)): 

    tr_l, tr_f1= train(model, train_datalaoder, optim, criterion)
    tot_t_loss.append(tr_l)

    val_l, val_f1, pred, tgt = evaluate(model, test_datalaoder, criterion)
    tot_v_loss.append(val_l)
    
    if val_l < best_valid_loss:
        best_valid_loss = val_l
        best_pred, best_tgt = pred, tgt
        torch.save(model.state_dict(), 'model_least_loss.pt')
        print("\nBest Model Saved !!")
    elif epoch % 3 == 0:
        torch.save(model.state_dict(), 'model_checkpoint_'+str(epoch)+'.pt')
        print("\Checkpoint Model Saved !!")
    print("\n")
    print(f'Epoch: {epoch+1:02}')
    print(f'\tTrain Total Loss: {tr_l:.3f} | Train F1: {tr_f1:.3f}')
    print(f'\tVal. Total Loss: {val_l:.3f} | Valid F1: {val_f1:.3f}')
    print("_________________________________________________________________")

  0%|          | 0/3 [00:00<?, ?it/s]
0it [00:00, ?it/s][A
1it [00:10, 10.93s/it][A
2it [00:21, 10.94s/it][A
3it [00:32, 10.79s/it][A
4it [00:43, 10.72s/it][A
5it [00:53, 10.69s/it][A
6it [01:04, 10.67s/it][A
7it [01:14, 10.64s/it][A
8it [01:25, 10.63s/it][A
9it [01:36, 10.61s/it][A
10it [01:46, 10.60s/it][A
11it [01:57, 10.58s/it][A
12it [02:07, 10.58s/it][A
13it [02:18, 10.57s/it][A
14it [02:28, 10.58s/it][A
15it [02:41, 11.30s/it][A
16it [02:57, 12.44s/it][A
17it [03:11, 13.05s/it][A
18it [03:22, 12.48s/it][A
19it [03:33, 11.89s/it][A
20it [03:43, 11.46s/it][A
21it [03:54, 11.17s/it][A
22it [04:04, 10.96s/it][A
23it [04:15, 10.81s/it][A
24it [04:25, 10.70s/it][A
25it [04:35, 10.64s/it][A
26it [04:46, 10.59s/it][A
27it [04:56, 10.54s/it][A
28it [05:07, 10.52s/it][A
29it [05:18, 10.61s/it][A
30it [05:29, 10.82s/it][A
31it [05:39, 10.73s/it][A
32it [05:50, 10.66s/it][A
33it [06:00, 10.61s/it][A
34it [06:11, 10.57s/it][A
35it [06:21, 10.55s/it][A
36it

In [32]:
print(metrics.classification_report(best_tgt, best_pred))

              precision    recall  f1-score   support

           0       0.56      0.78      0.65       102
           1       1.00      0.07      0.12        30
           2       0.75      0.57      0.65       126
           3       0.34      0.72      0.46        53
           4       0.53      0.81      0.64        21
           5       0.00      0.00      0.00        20
           6       0.82      0.82      0.82       429
           7       0.52      0.64      0.58        50
           8       0.77      0.83      0.80       204
           9       0.44      0.24      0.31        50
          10       0.00      0.00      0.00        23
          11       0.58      0.48      0.53        98
          12       0.00      0.00      0.00        10
          13       0.20      0.30      0.24        10

    accuracy                           0.67      1226
   macro avg       0.46      0.45      0.41      1226
weighted avg       0.67      0.67      0.65      1226



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## TESTING

In [33]:
df_test = pd.read_csv("/content/gdrive/MyDrive/titans_milestone3/codebase/Task-TC/processed_data/test_TC_dataset_preprocessed.csv")
df_test.head()


Unnamed: 0,Id,Sentence,Span_text,ratio,Span,Span_start,Span_end,preprocessed_sent,preprocessed_span
0,813452859,: I guess her only chance is if Labour decides...,to dishonour democracy,7.666667,"(717, 739)",717,739,i guess her only chance is if labour decides t...,to dishonour democracy
1,813452859,: I guess her only chance is if Labour decides...,I guess her only chance is if Labour decides t...,1.045455,"(657, 773)",657,773,i guess her only chance is if labour decides t...,i guess her only chance is if labour decides t...
2,813452859,There is a chance; as unfortunately there are ...,dead in the water,21.5,"(1293, 1310)",1293,1310,there is a chance as unfortunately there are m...,dead in the water
3,813452859,There is a chance; as unfortunately there are ...,bitterly against it,28.666667,"(1128, 1147)",1128,1147,there is a chance as unfortunately there are m...,bitterly against it
4,813452859,There is a chance; as unfortunately there are ...,remoaner MPs,43.0,"(1256, 1268)",1256,1268,there is a chance as unfortunately there are m...,remoaner mps


In [34]:
df_dev = pd.read_csv("/content/gdrive/MyDrive/titans_milestone3/codebase/Task-TC/processed_data/dev_TC_dataset_preprocessed.csv")
df_dev.head()


Unnamed: 0,Id,Sentence,Span_text,ratio,Span,Span_start,Span_end,class,preprocessed_sent,preprocessed_span
0,730093263,"Earlier, I blogged that the police had release...",white,25.0,"(123, 128)",123,128,"Whataboutism,Straw_Men,Red_Herring",earlier i blogged that the police had released...,white
1,730093263,( Most people named Quentin Lamar Smith are bl...,black,12.0,"(352, 357)",352,357,"Whataboutism,Straw_Men,Red_Herring",most people named quentin lamar smith are blac...,black
2,730093263,He called them “true American heroes.”,“true American heroes.”,2.0,"(1370, 1393)",1370,1393,Flag-Waving,he called them true american heroes,true american heroes
3,730093263,"it is the same guy, and he is black.\n",black,9.0,"(2434, 2439)",2434,2439,"Whataboutism,Straw_Men,Red_Herring",it is the same guy and he is black,black
4,730093263,"This goes in our ""Black Murders Of White Cops""...","""Black Murders Of White Cops""",6.0,"(2458, 2487)",2458,2487,Loaded_Language,this goes in our black murders of white cops c...,black murders of white cops


In [35]:
class BERTClassifier(nn.Module):
    def __init__(self, transformer):
        super().__init__()
        self.transformer = transformer
        self.linear = nn.Linear(768, 14)
        self.drop = nn.Dropout(p=0.3)

    def forward(self, in_ids, type_ids, attn_mask):
        op = self.transformer(input_ids=in_ids, attention_mask=attn_mask, 
                              token_type_ids=type_ids)

        return  self.linear(self.drop(op["pooler_output"]))

In [36]:
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
transformer = BertModel.from_pretrained("bert-base-uncased")
model2 = BERTClassifier(transformer).to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [37]:
model2.load_state_dict(torch.load("model_least_loss.pt"))
model2.eval()

BERTClassifier(
  (transformer): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_aff

In [38]:
def predict(df):
    sentences=df['preprocessed_sent'].values
    spans = df['preprocessed_span'].values
    result=[]
    for i in range(len(sentences)):
        testf_data=[(sentences[i],spans[i])]
        testf_enc = tokenizer.batch_encode_plus(testf_data, padding="max_length", truncation=True, max_length=100, return_tensors="pt")

        input_ids = testf_enc["input_ids"].to(device)
        attn_mask = testf_enc["attention_mask"].to(device)
        type_ids = testf_enc["token_type_ids"].to(device)

        output = model2(input_ids, type_ids, attn_mask)
        res=torch.argmax(output, -1).tolist()[0]

        result.append(res)
    return result

In [39]:
test_result=predict(df_test)
dev_result=predict(df_dev)

In [40]:
df_test['result']=test_result
df_dev['result']=dev_result

In [41]:
label_dict_rev={}
for k, v in enumerate(label_dict):
    label_dict_rev[k]=v

In [42]:
label_dict_rev

{0: 'Doubt',
 1: 'Appeal_to_Authority',
 2: 'Repetition',
 3: 'Appeal_to_fear-prejudice',
 4: 'Slogans',
 5: 'Black-and-White_Fallacy',
 6: 'Loaded_Language',
 7: 'Flag-Waving',
 8: 'Name_Calling,Labeling',
 9: 'Causal_Oversimplification',
 10: 'Whataboutism,Straw_Men,Red_Herring',
 11: 'Exaggeration,Minimisation',
 12: 'Bandwagon,Reductio_ad_hitlerum',
 13: 'Thought-terminating_Cliches'}

In [43]:
df_test['label_pred_value'] = df_test['result'].replace(label_dict_rev)
df_test.head()

Unnamed: 0,Id,Sentence,Span_text,ratio,Span,Span_start,Span_end,preprocessed_sent,preprocessed_span,result,label_pred_value
0,813452859,: I guess her only chance is if Labour decides...,to dishonour democracy,7.666667,"(717, 739)",717,739,i guess her only chance is if labour decides t...,to dishonour democracy,6,Loaded_Language
1,813452859,: I guess her only chance is if Labour decides...,I guess her only chance is if Labour decides t...,1.045455,"(657, 773)",657,773,i guess her only chance is if labour decides t...,i guess her only chance is if labour decides t...,3,Appeal_to_fear-prejudice
2,813452859,There is a chance; as unfortunately there are ...,dead in the water,21.5,"(1293, 1310)",1293,1310,there is a chance as unfortunately there are m...,dead in the water,6,Loaded_Language
3,813452859,There is a chance; as unfortunately there are ...,bitterly against it,28.666667,"(1128, 1147)",1128,1147,there is a chance as unfortunately there are m...,bitterly against it,6,Loaded_Language
4,813452859,There is a chance; as unfortunately there are ...,remoaner MPs,43.0,"(1256, 1268)",1256,1268,there is a chance as unfortunately there are m...,remoaner mps,8,"Name_Calling,Labeling"


In [44]:
df_dev['label_pred_value'] = df_dev['result'].replace(label_dict_rev)
df_dev.head()

Unnamed: 0,Id,Sentence,Span_text,ratio,Span,Span_start,Span_end,class,preprocessed_sent,preprocessed_span,result,label_pred_value
0,730093263,"Earlier, I blogged that the police had release...",white,25.0,"(123, 128)",123,128,"Whataboutism,Straw_Men,Red_Herring",earlier i blogged that the police had released...,white,2,Repetition
1,730093263,( Most people named Quentin Lamar Smith are bl...,black,12.0,"(352, 357)",352,357,"Whataboutism,Straw_Men,Red_Herring",most people named quentin lamar smith are blac...,black,2,Repetition
2,730093263,He called them “true American heroes.”,“true American heroes.”,2.0,"(1370, 1393)",1370,1393,Flag-Waving,he called them true american heroes,true american heroes,8,"Name_Calling,Labeling"
3,730093263,"it is the same guy, and he is black.\n",black,9.0,"(2434, 2439)",2434,2439,"Whataboutism,Straw_Men,Red_Herring",it is the same guy and he is black,black,6,Loaded_Language
4,730093263,"This goes in our ""Black Murders Of White Cops""...","""Black Murders Of White Cops""",6.0,"(2458, 2487)",2458,2487,Loaded_Language,this goes in our black murders of white cops c...,black murders of white cops,8,"Name_Calling,Labeling"


In [45]:
df_res_test = df_test[['Id','label_pred_value','Span_start','Span_end']]
df_res_test.head()

Unnamed: 0,Id,label_pred_value,Span_start,Span_end
0,813452859,Loaded_Language,717,739
1,813452859,Appeal_to_fear-prejudice,657,773
2,813452859,Loaded_Language,1293,1310
3,813452859,Loaded_Language,1128,1147
4,813452859,"Name_Calling,Labeling",1256,1268


In [46]:
df_res_dev = df_dev[['Id','label_pred_value','Span_start','Span_end']]
df_res_dev.head()

Unnamed: 0,Id,label_pred_value,Span_start,Span_end
0,730093263,Repetition,123,128
1,730093263,Repetition,352,357
2,730093263,"Name_Calling,Labeling",1370,1393
3,730093263,Loaded_Language,2434,2439
4,730093263,"Name_Calling,Labeling",2458,2487


In [47]:
df_res_test.to_csv('/content/gdrive/MyDrive/titans_milestone3/codebase/Result-TC/tc_res_final_test.tsv',index=False,header=False,sep="\t")

In [48]:
df_res_dev.to_csv('/content/gdrive/MyDrive/titans_milestone3/codebase/Result-TC/tc_res_final_dev.tsv',index=False,header=False,sep="\t")