In [None]:
from transformers import pipeline
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

In [None]:
class TweetDataset(Dataset):
    def __init__(self, tweets, candidate_labels):
        self.tweets = tweets
        self.candidate_labels = candidate_labels
        self.classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
    
    def __len__(self):
        return len(self.tweets)
    
    def __getitem__(self, idx):
        tweet = self.tweets[idx]
        result = self.classifier(tweet, self.candidate_labels, multi_label=True, torch_dtype=torch.float16)
        return result

In [None]:
print("Reading data")
data = pd.read_csv("data/Russia_invade.csv")

In [None]:
print("Creating dataloader")
tweets = data["renderedContent"].tolist()[:256]
candidate_labels = ['USA started the war',
                    'POTUS started the war',
                    'Joe Biden started the war',
                    'CIA started the war',
                    'USA influenced the war',
                    'POTUS influenced the war',
                    'Joe Biden influenced the war',
                    'CIA influenced the war']
tweet_dataset = TweetDataset(tweets, candidate_labels)

In [None]:
print("Starting analysis")
tweets_blaming_america = {}
for batch in tqdm(tweet_dataloader):
    results = {}
    for i, tweet in enumerate(batch['sequence']):
        labels = [l[i] for l in batch['labels']]
        scores = [s[i] for s in batch['scores']]
        results[tweet] = {label: score for label, score in zip(labels, scores)}
    
    for tweet, result in results.items():
        if any(val > 0.75 for val in result.values()):
            tweets_blaming_america[tweet] = result

In [None]:
for tweet, result in tweets_blaming_america.items():
    print(tweet.replace("\n", " "))
    for label, score in result.items():
        print(f"\t{label}: {score}")

In [None]:
def benchmark(dataloader, batch_size):
    print(f"Starting analysis, batch size={batch_size}")
    tweets_blaming_america = {}
    for batch in tqdm(dataloader):
        results = {}
        for i, tweet in enumerate(batch['sequence']):
            labels = [l[i] for l in batch['labels']]
            scores = [s[i] for s in batch['scores']]
            results[tweet] = {label: score for label, score in zip(labels, scores)}
        
        for tweet, result in results.items():
            if any(val > 0.75 for val in result.values()):
                tweets_blaming_america[tweet] = result

In [None]:
tweet_dataloader_64 = DataLoader(tweet_dataset, batch_size=64)
tweet_dataloader_128 = DataLoader(tweet_dataset, batch_size=128)

In [None]:
benchmark(tweet_dataloader_64, 64)
benchmark(tweet_dataloader_128, 128)

In [None]:
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", torch_dtype=torch.float32)

In [3]:
import os
import deepspeed
import torch
from transformers import pipeline

local_rank = int(os.getenv('LOCAL_RANK', '0'))
world_size = int(os.getenv('WORLD_SIZE', '1'))
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

classifier.model = deepspeed.init_inference(classifier.model,
                                            mp_size=world_size,
                                            dtype=torch.float,
                                            replace_method='auto')

[2023-04-13 16:20:46,274] [INFO] [logging.py:93:log_dist] [Rank -1] DeepSpeed info: version=0.8.3, git-hash=unknown, git-branch=unknown
[2023-04-13 16:20:46,277] [INFO] [logging.py:93:log_dist] [Rank -1] quantize_bits = 8 mlp_extra_grouping = False, quantize_groups = 1
AutoTP:  [(<class 'transformers.models.bart.modeling_bart.BartEncoderLayer'>, ['.fc2', 'self_attn.out_proj']), (<class 'transformers.models.bart.modeling_bart.BartDecoderLayer'>, ['.fc2', 'encoder_attn.out_proj', 'self_attn.out_proj'])]


RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx

In [None]:
tests = ['POTUS is the one that has been antagonising Russia. They are the ones who started this war, now we will be the ones to finish it!',
         'America started the war, and now theyre the ones that are calling Russia evil! Hypocrites!!',
         'CIA meddling in Ukraine since WWII, cia paramilitary, Zelensky being an installed actor.']
for tweet in tests:
    candidate_labels = ['USA started the war', 'Joe Biden is a bad President',
                        'USA are evil', 'POTUS is bad', 'CIA influenced the war']
    result = classifier(tweet, candidate_labels, multi_label=True)
    print(tweet)
    for label, score in zip(result['labels'], result['scores']):
        print(f"\t{label}: {score}")