In [52]:
from transformers import pipeline
import pandas as pd
from tqdm import tqdm

In [53]:
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

In [54]:
data = pd.read_csv("data/Russia_invade.csv")
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 170835 entries, 0 to 170834
Data columns (total 29 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   _type             170835 non-null  object 
 1   url               170835 non-null  object 
 2   date              170835 non-null  object 
 3   content           170835 non-null  object 
 4   renderedContent   170835 non-null  object 
 5   id                170835 non-null  int64  
 6   user              170835 non-null  object 
 7   replyCount        170835 non-null  int64  
 8   retweetCount      170835 non-null  int64  
 9   likeCount         170835 non-null  int64  
 10  quoteCount        170835 non-null  int64  
 11  conversationId    170835 non-null  int64  
 12  lang              170835 non-null  object 
 13  source            170835 non-null  object 
 14  sourceUrl         170835 non-null  object 
 15  sourceLabel       170835 non-null  object 
 16  outlinks          39

In [55]:
cropped_data = data.sample(50, random_state=42)
cropped_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 50 entries, 4523 to 35648
Data columns (total 29 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   _type             50 non-null     object 
 1   url               50 non-null     object 
 2   date              50 non-null     object 
 3   content           50 non-null     object 
 4   renderedContent   50 non-null     object 
 5   id                50 non-null     int64  
 6   user              50 non-null     object 
 7   replyCount        50 non-null     int64  
 8   retweetCount      50 non-null     int64  
 9   likeCount         50 non-null     int64  
 10  quoteCount        50 non-null     int64  
 11  conversationId    50 non-null     int64  
 12  lang              50 non-null     object 
 13  source            50 non-null     object 
 14  sourceUrl         50 non-null     object 
 15  sourceLabel       50 non-null     object 
 16  outlinks          8 non-null      object

In [42]:
tests = ['POTUS is the one that has been antagonising Russia. They are the ones who started this war, now we will be the ones to finish it!', 
              'America started the war, and now theyre the ones that are calling Russia evil! Hypocrites!!',
              'CIA meddling in Ukraine since WWII, cia paramilitary, Zelensky being an installed actor.']
for tweet in tests:
    candidate_labels = ['USA started the war', 'Joe Biden is a bad President', 'USA are evil', 'POTUS is bad', 'CIA influenced the war']    
    result = classifier(tweet, candidate_labels, multi_label=True)
    print(tweet)
    for label, score in zip(result['labels'], result['scores']):
        print(f"\t{label}: {round(score, 5)}")

POTUS is the one that has been antagonising Russia. They are the ones who started this war, now we will be the ones to finish it!
	POTUS is bad: 0.99321
	USA started the war: 0.86881
	USA are evil: 0.77508
	CIA influenced the war: 0.00528
	Joe Biden is a bad President: 0.0001
America started the war, and now theyre the ones that are calling Russia evil! Hypocrites!!
	USA started the war: 0.99815
	USA are evil: 0.7821
	POTUS is bad: 0.10558
	CIA influenced the war: 0.03277
	Joe Biden is a bad President: 0.00658
CIA meddling in Ukraine since WWII, cia paramilitary, Zelensky being an installed actor.
	CIA influenced the war: 0.99558
	USA are evil: 0.23234
	POTUS is bad: 0.01662
	Joe Biden is a bad President: 0.00123
	USA started the war: 0.00106


In [56]:
import torch
tweets_blaming_america = {}
for tweet in tqdm(cropped_data["renderedContent"]):
    candidate_labels = ['USA started the war', 'Joe Biden is a bad President', 'USA are evil', 'POTUS is bad']    
    result = classifier(tweet, candidate_labels, multi_label=True, torch_dtype=torch.float16)
    if any(val > 0.75 for val in result['scores']):
        tweets_blaming_america[tweet] = dict(zip(result['labels'], result['scores']))


100%|██████████| 50/50 [01:43<00:00,  2.06s/it]


In [45]:
for (tweet, scores) in tweets_blaming_america.items():
    reduced_tweet = tweet.replace('\n', ' ')
    print(f"{reduced_tweet}")
    for label, score in scores.items():
        print(f"\t{label}: {round(score, 5)}")

@FoxNews Russia IS NOT going to invade Ukraine; they are playing a game, and the US is falling for it. The US seems to be more concerned than Ukraine is. The US wants an invasion, and they are doing everything they can to provoke one, the US needs to shut the f@#k up about it.
	USA are evil: 0.78356
	POTUS is bad: 0.1809
	Joe Biden is a bad President: 0.01268
	USA started the war: 0.01215
@joshrogin @Russia @UN ANTICIPATED-12/6/17-“The totality of evidence at hand states that…Nazis got into collusion with Donald John Trump,Vladimir Vladimirovich  Putin &amp; Xi JinPing to fix the 2016 US election,fix the 2018 mid-term elections &amp; colluded for all three of them to invade.”-API🌅
	POTUS is bad: 0.76422
	USA are evil: 0.50593
	USA started the war: 0.00288
	Joe Biden is a bad President: 0.00029


In [61]:
from transformers import pipeline
import pandas as pd
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

class TweetDataset(Dataset):
    def __init__(self, tweets, candidate_labels):
        self.tweets = tweets
        self.candidate_labels = candidate_labels
        self.classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
    
    def __len__(self):
        return len(self.tweets)
    
    def __getitem__(self, idx):
        tweet = self.tweets[idx]
        result = self.classifier(tweet, self.candidate_labels, multi_label=True, torch_dtype=torch.float16)
        return result

print("Reading data")
data = pd.read_csv("data/Russia_invade.csv")
tweets = data["renderedContent"].tolist()[:50]
candidate_labels = ['USA started the war', 'Joe Biden is a bad President', 'USA are evil', 'POTUS is bad']

print("Creating dataloader")
tweet_dataset = TweetDataset(tweets, candidate_labels)
tweet_dataloader = DataLoader(tweet_dataset, batch_size=16)

print("Starting analysis")
tweets_blaming_america = {}
for batch in tqdm(tweet_dataloader):
    for result, tweet in zip(batch, tweets):
        if any(val > 0.75 for val in result['scores']):
            tweets_blaming_america[tweet] = dict(zip(result['labels'], result['scores']))


Reading data


KeyboardInterrupt: 

In [60]:
# torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.is_available()

False