In [1]:
import torch
from torch.utils.data import Dataset, TensorDataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import pickle
import os
import pandas as pd
import re
import nltk.data

In [2]:
from transformers import BertTokenizer

In [3]:
train_bodies_data = pd.read_csv('fnc-1/train_bodies.csv')
train_stances_data = pd.read_csv('fnc-1/train_stances.csv')

In [4]:
train_bodies_data.head()

Unnamed: 0,Body ID,articleBody
0,0,A small meteorite crashed into a wooded area i...
1,4,Last week we hinted at what was to come as Ebo...
2,5,(NEWSER) – Wonder how long a Quarter Pounder w...
3,6,"Posting photos of a gun-toting child online, I..."
4,7,At least 25 suspected Boko Haram insurgents we...


In [5]:
train_stances_data.head()

Unnamed: 0,Headline,Body ID,Stance
0,Police find mass graves with at least '15 bodi...,712,unrelated
1,Hundreds of Palestinians flee floods in Gaza a...,158,agree
2,"Christian Bale passes on role of Steve Jobs, a...",137,unrelated
3,HBO and Apple in Talks for $15/Month Apple TV ...,1034,unrelated
4,Spider burrowed through tourist's stomach and ...,1923,disagree


In [6]:
df_all = pd.merge(train_bodies_data, train_stances_data, left_on = 'Body ID', right_on = 'Body ID')
df_all

Unnamed: 0,Body ID,articleBody,Headline,Stance
0,0,A small meteorite crashed into a wooded area i...,"Soldier shot, Parliament locked down after gun...",unrelated
1,0,A small meteorite crashed into a wooded area i...,Tourist dubbed ‘Spider Man’ after spider burro...,unrelated
2,0,A small meteorite crashed into a wooded area i...,Luke Somers 'killed in failed rescue attempt i...,unrelated
3,0,A small meteorite crashed into a wooded area i...,BREAKING: Soldier shot at War Memorial in Ottawa,unrelated
4,0,A small meteorite crashed into a wooded area i...,Giant 8ft 9in catfish weighing 19 stone caught...,unrelated
...,...,...,...,...
49967,2532,"ANN ARBOR, Mich. – A pizza delivery man in Mic...","Pizza delivery man gets tipped more than $2,00...",agree
49968,2532,"ANN ARBOR, Mich. – A pizza delivery man in Mic...","Pizza delivery man gets $2,000 tip",agree
49969,2532,"ANN ARBOR, Mich. – A pizza delivery man in Mic...","Luckiest Pizza Delivery Guy Ever Gets $2,000 Tip",agree
49970,2532,"ANN ARBOR, Mich. – A pizza delivery man in Mic...",Ann Arbor pizza delivery driver surprised with...,agree


In [7]:
alphabets= "([A-Za-z])"
prefixes = "(Mr|St|Mrs|Ms|Dr)[.]"
suffixes = "(Inc|Ltd|Jr|Sr|Co)"
starters = "(Mr|Mrs|Ms|Dr|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)"
websites = "[.](com|net|org|io|gov)"

def split_into_sentences(text):
    text = " " + text + "  "
    text = text.replace("\n"," ")
    text = re.sub(prefixes,"\\1<prd>",text)
    text = re.sub(websites,"<prd>\\1",text)
    if "Ph.D" in text: text = text.replace("Ph.D.","Ph<prd>D<prd>")
    text = re.sub("\s" + alphabets + "[.] "," \\1<prd> ",text)
    text = re.sub(acronyms+" "+starters,"\\1<stop> \\2",text)
    text = re.sub(alphabets + "[.]" + alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>\\3<prd>",text)
    text = re.sub(alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>",text)
    text = re.sub(" "+suffixes+"[.] "+starters," \\1<stop> \\2",text)
    text = re.sub(" "+suffixes+"[.]"," \\1<prd>",text)
    text = re.sub(" " + alphabets + "[.]"," \\1<prd>",text)
    if "”" in text: text = text.replace(".”","”.")
    if "\"" in text: text = text.replace(".\"","\".")
    if "!" in text: text = text.replace("!\"","\"!")
    if "?" in text: text = text.replace("?\"","\"?")
    text = text.replace(".",".<stop>")
    text = text.replace("?","?<stop>")
    text = text.replace("!","!<stop>")
    text = text.replace("<prd>",".")
    sentences = text.split("<stop>")
    sentences = sentences[:-1]
    sentences = [s.strip() for s in sentences]
    return sentences

In [8]:
related_df = df_all[df_all.Stance != 'unrelated']
related_df

Unnamed: 0,Body ID,articleBody,Headline,Stance
24,0,A small meteorite crashed into a wooded area i...,Small Meteorite Strikes in Nicaragua's Capital...,agree
36,4,Last week we hinted at what was to come as Ebo...,It Begins: HazMat-Wearing Passenger Spotted At...,discuss
69,5,(NEWSER) – Wonder how long a Quarter Pounder w...,20-Year-Old Quarter Pounder Looks About the Same,discuss
88,5,(NEWSER) – Wonder how long a Quarter Pounder w...,Two blokes dared to eat 20-year-old burger for...,discuss
93,6,"Posting photos of a gun-toting child online, I...",‘The cub of Baghdadi': ISIS reports its younge...,discuss
...,...,...,...,...
49967,2532,"ANN ARBOR, Mich. – A pizza delivery man in Mic...","Pizza delivery man gets tipped more than $2,00...",agree
49968,2532,"ANN ARBOR, Mich. – A pizza delivery man in Mic...","Pizza delivery man gets $2,000 tip",agree
49969,2532,"ANN ARBOR, Mich. – A pizza delivery man in Mic...","Luckiest Pizza Delivery Guy Ever Gets $2,000 Tip",agree
49970,2532,"ANN ARBOR, Mich. – A pizza delivery man in Mic...",Ann Arbor pizza delivery driver surprised with...,agree


In [9]:
train_df = pd.DataFrame(columns = ['Sentence', 'Headline', 'Stance'])
val_df = pd.DataFrame(columns = ['Sentence', 'Headline', 'Stance'])

In [10]:
related_df.reset_index()

i = 0
k = 0
t = 1
for index, row in related_df.iterrows():
    sentences = split_into_sentences(row['articleBody'])    

    if(len(sentences) != 0):
        sentence = sentences[0]
        j = 1
        while(len(sentence.split()) < 7):
            if(j >= len(sentences)):
                break
            sentence += sentences[j]
            j += 1
    else:
        sentence = row['articleBody']

    if(t < 5):
        train_df.loc[i] = [sentence, row['Headline'], row['Stance']]
        t += 1
        i += 1
    else:
        val_df.loc[k] = [sentence, row['Headline'], row['Stance']]
        t = 1
        k += 1

In [11]:
train_df

Unnamed: 0,Sentence,Headline,Stance
0,A small meteorite crashed into a wooded area i...,Small Meteorite Strikes in Nicaragua's Capital...,agree
1,Last week we hinted at what was to come as Ebo...,It Begins: HazMat-Wearing Passenger Spotted At...,discuss
2,(NEWSER) – Wonder how long a Quarter Pounder w...,20-Year-Old Quarter Pounder Looks About the Same,discuss
3,(NEWSER) – Wonder how long a Quarter Pounder w...,Two blokes dared to eat 20-year-old burger for...,discuss
4,At least 25 suspected Boko Haram insurgents we...,Insurgents killed in Nigeria despite alleged t...,discuss
...,...,...,...
10737,"ANN ARBOR, Mich.– A pizza delivery man in Mich...","Pizza delivery driver surprised with $2,000 tip",agree
10738,"ANN ARBOR, Mich.– A pizza delivery man in Mich...","Pizza delivery man gets tipped more than $2,00...",agree
10739,"ANN ARBOR, Mich.– A pizza delivery man in Mich...","Pizza delivery man gets $2,000 tip",agree
10740,"ANN ARBOR, Mich.– A pizza delivery man in Mich...",Ann Arbor pizza delivery driver surprised with...,agree


In [12]:
val_df

Unnamed: 0,Sentence,Headline,Stance
0,"Posting photos of a gun-toting child online, I...",‘The cub of Baghdadi': ISIS reports its younge...,discuss
1,"(CNN) -- A meteorite crashed down in Managua, ...",Small Meteorite Strikes in Nicaragua's Capital...,agree
2,mboxCreate('FoxNews-Politics-Autoplay-Videos-I...,US probing claims ISIS fighters seized airdrop...,discuss
3,HBO's subscription streaming service will be c...,HBO streaming service could launch in April fo...,discuss
4,"Eran Cicurel, an editor at Voice of Israel, ha...",YPG Confirms: Gill Rosenberg Not Captured in K...,agree
...,...,...,...
2680,More than 200 schoolgirls were kidnapped in Ap...,Nigeria claims deal with Boko Haram on ceasefi...,discuss
2681,More than 200 schoolgirls were kidnapped in Ap...,"Boko Haram claims to have German hostage, deni...",discuss
2682,A Guantanamo Bay prisoner released last year a...,Senator: Detainees swapped for Bergdahl have c...,discuss
2683,A Guantanamo Bay prisoner released last year a...,Official: Gitmo prisoner traded for Bergdahl r...,discuss


In [13]:
class MNLIDataBert(Dataset):

  def __init__(self, train_df, val_df):
    self.label_dict = {'disagree': 0, 'discuss': 1, 'agree': 2}

    self.train_df = train_df
    self.val_df = val_df

    self.base_path = '/content/'
    self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) # Using a pre-trained BERT tokenizer to encode sentences
    self.train_data = None
    self.val_data = None
    self.init_data()

  def init_data(self):
    self.train_data = self.load_data(self.train_df)
    self.val_data = self.load_data(self.val_df)

  def load_data(self, df):
    MAX_LEN = 512
    token_ids = []
    mask_ids = []
    seg_ids = []
    y = []

    premise_list = df['Sentence'].to_list()
    hypothesis_list = df['Headline'].to_list()
    label_list = df['Stance'].to_list()

    for (premise, hypothesis, label) in zip(premise_list, hypothesis_list, label_list):
      premise_id = self.tokenizer.encode(premise, add_special_tokens = False)
      hypothesis_id = self.tokenizer.encode(hypothesis, add_special_tokens = False)
      pair_token_ids = [self.tokenizer.cls_token_id] + premise_id + [self.tokenizer.sep_token_id] + hypothesis_id + [self.tokenizer.sep_token_id]
      premise_len = len(premise_id)
      hypothesis_len = len(hypothesis_id)

      segment_ids = torch.tensor([0] * (premise_len + 2) + [1] * (hypothesis_len + 1))  # sentence 0 and sentence 1
      attention_mask_ids = torch.tensor([1] * (premise_len + hypothesis_len + 3))  # mask padded values

      token_ids.append(torch.tensor(pair_token_ids))
      seg_ids.append(segment_ids)
      mask_ids.append(attention_mask_ids)
      y.append(self.label_dict[label])
    
    token_ids = pad_sequence(token_ids, batch_first=True)
    mask_ids = pad_sequence(mask_ids, batch_first=True)
    seg_ids = pad_sequence(seg_ids, batch_first=True)
    y = torch.tensor(y)
    dataset = TensorDataset(token_ids, mask_ids, seg_ids, y)
    print(len(dataset))
    return dataset

  def get_data_loaders(self, batch_size=25, shuffle=True):
    train_loader = DataLoader(
      self.train_data,
      shuffle=shuffle,
      batch_size=batch_size
    )

    val_loader = DataLoader(
      self.val_data,
      shuffle=shuffle,
      batch_size=batch_size
    )

    return train_loader, val_loader
  
mnli_dataset = MNLIDataBert(train_df, val_df)

10742
2685


In [14]:
train_loader, val_loader = mnli_dataset.get_data_loaders(batch_size=12)

In [15]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [16]:
from transformers import BertForSequenceClassification, AdamW

model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=3)
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [17]:
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.0}
]

In [18]:
# This variable contains all of the hyperparemeter information our training loop needs
optimizer = AdamW(optimizer_grouped_parameters, lr=2e-5, correct_bias=False)



In [19]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 109,484,547 trainable parameters


In [20]:
def multi_acc(y_pred, y_test):
  acc = (torch.log_softmax(y_pred, dim=1).argmax(dim=1) == y_test).sum().float() / float(y_test.size(0))
  return acc

In [21]:
import time

In [22]:
EPOCHS = 5

def train(model, train_loader, val_loader, optimizer):  
  total_step = len(train_loader)

  for epoch in range(EPOCHS):
    start = time.time()
    model.train()
    total_train_loss = 0
    total_train_acc  = 0
    for batch_idx, (pair_token_ids, mask_ids, seg_ids, y) in enumerate(train_loader):
      optimizer.zero_grad()
      pair_token_ids = pair_token_ids.to(device)
      mask_ids = mask_ids.to(device)
      seg_ids = seg_ids.to(device)
      labels = y.to(device)
      # prediction = model(pair_token_ids, mask_ids, seg_ids)
      loss, prediction = model(pair_token_ids, 
                             token_type_ids=seg_ids, 
                             attention_mask=mask_ids, 
                             labels=labels).values()

      # loss = criterion(prediction, labels)
      acc = multi_acc(prediction, labels)

      loss.backward()
      optimizer.step()
      
      total_train_loss += loss.item()
      total_train_acc  += acc.item()

    train_acc  = total_train_acc/len(train_loader)
    train_loss = total_train_loss/len(train_loader)
    model.eval()
    total_val_acc  = 0
    total_val_loss = 0
    with torch.no_grad():
      for batch_idx, (pair_token_ids, mask_ids, seg_ids, y) in enumerate(val_loader):
        optimizer.zero_grad()
        pair_token_ids = pair_token_ids.to(device)
        mask_ids = mask_ids.to(device)
        seg_ids = seg_ids.to(device)
        labels = y.to(device)

        # prediction = model(pair_token_ids, mask_ids, seg_ids)
        loss, prediction = model(pair_token_ids, 
                             token_type_ids=seg_ids, 
                             attention_mask=mask_ids, 
                             labels=labels).values()
        
        # loss = criterion(prediction, labels)
        acc = multi_acc(prediction, labels)

        total_val_loss += loss.item()
        total_val_acc  += acc.item()

    val_acc  = total_val_acc/len(val_loader)
    val_loss = total_val_loss/len(val_loader)
    end = time.time()
    hours, rem = divmod(end-start, 3600)
    minutes, seconds = divmod(rem, 60)

    print(f'Epoch {epoch+1}: train_loss: {train_loss:.4f} train_acc: {train_acc:.4f} | val_loss: {val_loss:.4f} val_acc: {val_acc:.4f}')
    print("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

In [23]:
train(model, train_loader, val_loader, optimizer)

Epoch 1: train_loss: 0.4919 train_acc: 0.8075 | val_loss: 0.2652 val_acc: 0.9103
00:12:02.28
Epoch 2: train_loss: 0.1518 train_acc: 0.9485 | val_loss: 0.1394 val_acc: 0.9567
00:13:30.68
Epoch 3: train_loss: 0.0550 train_acc: 0.9834 | val_loss: 0.1011 val_acc: 0.9748
00:17:41.53
Epoch 4: train_loss: 0.0395 train_acc: 0.9873 | val_loss: 0.1134 val_acc: 0.9709
00:22:31.75
Epoch 5: train_loss: 0.0213 train_acc: 0.9936 | val_loss: 0.0935 val_acc: 0.9809
00:19:44.19


In [27]:
torch.save(model,"mymodel.pth")

# ----------------------

In [38]:
from transformers import TextClassificationPipeline

In [35]:
comp_bodies = pd.read_csv('fnc-1/competition_test_bodies.csv')
comp_stances = pd.read_csv('fnc-1/competition_test_stances.csv')

In [36]:
comp_merged = pd.merge(comp_bodies, comp_stances, left_on = 'Body ID', right_on = 'Body ID')
comp_merged

Unnamed: 0,Body ID,articleBody,Headline,Stance
0,1,Al-Sisi has denied Israeli reports stating tha...,Apple installing safes in-store to protect gol...,unrelated
1,1,Al-Sisi has denied Israeli reports stating tha...,El-Sisi denies claims he'll give Sinai land to...,agree
2,1,Al-Sisi has denied Israeli reports stating tha...,Apple to keep gold Watch Editions in special i...,unrelated
3,1,Al-Sisi has denied Israeli reports stating tha...,Apple Stores to Keep Gold “Edition” Apple Watc...,unrelated
4,1,Al-Sisi has denied Israeli reports stating tha...,South Korean woman's hair 'eaten' by robot vac...,unrelated
...,...,...,...,...
25408,2586,Remember how much Republicans wanted to repeal...,A Sign That Obamacare Exchanges Are Failing,disagree
25409,2586,Remember how much Republicans wanted to repeal...,Republicans call Obamacare a 'failure.' These ...,agree
25410,2586,Remember how much Republicans wanted to repeal...,CBO’s Alternate Facts Show Obamacare is Unsust...,disagree
25411,2586,Remember how much Republicans wanted to repeal...,Why Obamacare failed,disagree


In [127]:
comp_merged_related = comp_merged[comp_merged.Stance != 'unrelated']
comp_merged_related

Unnamed: 0,Body ID,articleBody,Headline,Stance
1,1,Al-Sisi has denied Israeli reports stating tha...,El-Sisi denies claims he'll give Sinai land to...,agree
34,2,A bereaved Afghan mother took revenge on the T...,Afghan Mother Kills 25 Taliban Fighters In Sev...,agree
107,3,CNBC is reporting Tesla has chosen Nevada as t...,REPORT: Tesla Chooses Nevada For Site Of Its M...,discuss
201,12,A 4-inch version of the iPhone 6 is said to be...,4-inch iPhone 6 reported to be in development,discuss
208,19,GR editor’s Note\n\nThere are no reports in th...,Iraqi Army Downs Two British Planes Carrying W...,discuss
...,...,...,...,...
25408,2586,Remember how much Republicans wanted to repeal...,A Sign That Obamacare Exchanges Are Failing,disagree
25409,2586,Remember how much Republicans wanted to repeal...,Republicans call Obamacare a 'failure.' These ...,agree
25410,2586,Remember how much Republicans wanted to repeal...,CBO’s Alternate Facts Show Obamacare is Unsust...,disagree
25411,2586,Remember how much Republicans wanted to repeal...,Why Obamacare failed,disagree


In [78]:
pipe = TextClassificationPipeline(model=model, 
                                  tokenizer=BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True),
                                  device=0)

In [134]:
guess_df = pd.DataFrame(columns = ['Guess'])

In [135]:
comp_merged_related.reset_index()
guess_df.reset_index()

tok = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

i = 0
for index, row in comp_merged_related.iterrows():
    
    sentences = split_into_sentences(row['articleBody'])    

    if(len(sentences) != 0):
        sentence = sentences[0]
        j = 1
        while(len(sentence.split()) < 7):
            if(j >= len(sentences)):
                break
            sentence += sentences[j]
            j += 1
    else:
        sentence = row['articleBody']
        
        
    sentence_id = tok.encode(sentence, add_special_tokens = False)
    headline_id = tok.encode(row['Headline'], add_special_tokens = False)
    merged_sentence = "[CLS] " + sentence + " [SEP] " + row['Headline'] + " [SEP]"
    
    t = (pipe(merged_sentence)[0]['label'])
    if (t=='LABEL_0'):
        guess_df.loc[i] = 'disagree'
    elif (t == 'LABEL_1'):
        guess_df.loc[i] = 'discuss'
    else:
        guess_df.loc[i] = 'agree'
    
    i += 1



In [139]:
comp_clean = pd.DataFrame(columns = ['Body ID', 'articleBody', 'Headline', 'Stance'])

In [142]:
comp_merged_related.reset_index()

i = 0

for index, row in comp_merged_related.iterrows():
    comp_clean.loc[i] = [row['Body ID'], row['articleBody'], row['Headline'], row['Stance']]
    i += 1

In [143]:
comp_clean

Unnamed: 0,Body ID,articleBody,Headline,Stance
0,1,Al-Sisi has denied Israeli reports stating tha...,El-Sisi denies claims he'll give Sinai land to...,agree
1,2,A bereaved Afghan mother took revenge on the T...,Afghan Mother Kills 25 Taliban Fighters In Sev...,agree
2,3,CNBC is reporting Tesla has chosen Nevada as t...,REPORT: Tesla Chooses Nevada For Site Of Its M...,discuss
3,12,A 4-inch version of the iPhone 6 is said to be...,4-inch iPhone 6 reported to be in development,discuss
4,19,GR editor’s Note\n\nThere are no reports in th...,Iraqi Army Downs Two British Planes Carrying W...,discuss
...,...,...,...,...
7059,2586,Remember how much Republicans wanted to repeal...,A Sign That Obamacare Exchanges Are Failing,disagree
7060,2586,Remember how much Republicans wanted to repeal...,Republicans call Obamacare a 'failure.' These ...,agree
7061,2586,Remember how much Republicans wanted to repeal...,CBO’s Alternate Facts Show Obamacare is Unsust...,disagree
7062,2586,Remember how much Republicans wanted to repeal...,Why Obamacare failed,disagree


In [138]:
guess_df

Unnamed: 0,Guess
0,disagree
1,agree
2,discuss
3,discuss
4,discuss
...,...
7059,agree
7060,disagree
7061,agree
7062,agree


In [144]:
comp_clean = comp_clean.join(guess_df)

In [145]:
comp_clean

Unnamed: 0,Body ID,articleBody,Headline,Stance,Guess
0,1,Al-Sisi has denied Israeli reports stating tha...,El-Sisi denies claims he'll give Sinai land to...,agree,disagree
1,2,A bereaved Afghan mother took revenge on the T...,Afghan Mother Kills 25 Taliban Fighters In Sev...,agree,agree
2,3,CNBC is reporting Tesla has chosen Nevada as t...,REPORT: Tesla Chooses Nevada For Site Of Its M...,discuss,discuss
3,12,A 4-inch version of the iPhone 6 is said to be...,4-inch iPhone 6 reported to be in development,discuss,discuss
4,19,GR editor’s Note\n\nThere are no reports in th...,Iraqi Army Downs Two British Planes Carrying W...,discuss,discuss
...,...,...,...,...,...
7059,2586,Remember how much Republicans wanted to repeal...,A Sign That Obamacare Exchanges Are Failing,disagree,agree
7060,2586,Remember how much Republicans wanted to repeal...,Republicans call Obamacare a 'failure.' These ...,agree,disagree
7061,2586,Remember how much Republicans wanted to repeal...,CBO’s Alternate Facts Show Obamacare is Unsust...,disagree,agree
7062,2586,Remember how much Republicans wanted to repeal...,Why Obamacare failed,disagree,agree


In [147]:
comp_clean.reset_index()

goodagree = 0
badagree = 0
gooddiscuss = 0
baddiscuss = 0
gooddisagree = 0
baddisagree = 0

i = 0
for index, row in comp_clean.iterrows():
    if(row['Stance'] == row['Guess']):
        if(row['Stance'] == 'agree'):
            goodagree += 1
        elif(row['Stance'] == 'discuss'):
            gooddiscuss += 1
        elif(row['Stance'] == 'disagree'):
            gooddisagree += 1
    else:
        if(row['Stance'] == 'agree'):
            badagree += 1
        elif(row['Stance'] == 'discuss'):
            baddiscuss += 1
        elif(row['Stance'] == 'disagree'):
            baddisagree += 1

In [148]:
print("Agree: ", goodagree, " - ", badagree)
print("Discuss: ", gooddiscuss, " - ", baddiscuss)
print("Disagree: ", gooddisagree, " - ", baddisagree)

Agree:  969  -  934
Discuss:  3755  -  709
Disagree:  262  -  435
