In [46]:
import re
import torch
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
from torch.nn.functional import softmax

In [47]:
# Load pre-trained BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [41]:
# Initialize Zero-Shot Classification pipeline
zero_shot_pipeline = pipeline("zero-shot-classification")

No model was supplied, defaulted to facebook/bart-large-mnli and revision c626438 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [48]:
def preprocess_tweet(tweet):
    # Remove punctuation, symbols, @mentions, #hashtags, and links
    tweet = re.sub(r'[^\w\s#@]', '', tweet)
    tweet = re.sub(r'http\S+', '', tweet)
    return tweet.strip()

In [57]:
def combined_sentiment(bert_sentiment, zero_shot_sentiment, bert_prob, zero_shot_prob):
    if bert_sentiment == 'Positive' and zero_shot_sentiment == 'positive':
        return 'positive'
    elif bert_sentiment == 'Negative' and zero_shot_sentiment == 'negative':
        return 'negative'
    else:
        # Use a weighted average of probabilities
        combined_score = (bert_prob + zero_shot_prob) / 2
        combined_sentiment_label = "positive" if combined_score > 0.5 else "negative"
        return combined_sentiment_label

In [58]:
def analyze_tweet(tweet):
    # Preprocess tweet
    processed_tweet = preprocess_tweet(tweet)

    # Use BERT for sentiment analysis
    inputs = tokenizer(processed_tweet, return_tensors='pt', truncation=True)
    outputs = model(**inputs)
    probs = softmax(outputs.logits, dim=1).detach().numpy()[0]
    sentiment_labels = ['Negative', 'Neutral', 'Positive']
    bert_sentiment = sentiment_labels[probs.argmax()]
    bert_prob = probs.max()

    # Use Zero-Shot Classification
    zero_shot_classification = zero_shot_pipeline(processed_tweet, candidate_labels=["positive", "negative", "neutral"])
    zero_shot_sentiment = zero_shot_classification['labels'][0]
    zero_shot_prob = zero_shot_classification['scores'][0]

    # Determine combined sentiment
    combined_sentiment_label = combined_sentiment(bert_sentiment, zero_shot_sentiment, bert_prob, zero_shot_prob)

    return {
        "tweet": tweet,
        "bert_sentiment": bert_sentiment,
        "zero_shot_classification": zero_shot_sentiment,
        "combined_sentiment": combined_sentiment_label
    }


In [59]:
# Example usage
tweet = "@mini_razdan10 Is this brave true pakistani muzlim issuing fatwa against fake arab muslim. ? Cat calling d kettle ? Stone age mein hi rahega aur bakiyon ko bhi rakhega. Matter of joke for Arabs. Koi nahi sunta in bikhariyon ki. Bhool gaya yeh.. Ki 15 din baad katora lekar wahi jana hai ?"
analysis_result = analyze_tweet(tweet)
print(analysis_result)

{'tweet': '@mini_razdan10 Is this brave true pakistani muzlim issuing fatwa against fake arab muslim. ? Cat calling d kettle ? Stone age mein hi rahega aur bakiyon ko bhi rakhega. Matter of joke for Arabs. Koi nahi sunta in bikhariyon ki. Bhool gaya yeh.. Ki 15 din baad katora lekar wahi jana hai ?', 'bert_sentiment': 'Negative', 'zero_shot_classification': 'negative', 'combined_sentiment': 'negative'}
