# Manual Model Testing with Single Tweet Input

In [4]:
import torch
import torch.nn as nn
from transformers import XLMRobertaModel, XLMRobertaTokenizer

In [5]:
class FauxHateDetector(nn.Module):
    def __init__(self, model_name='xlm-roberta-base', num_labels_task1=2, num_labels_task2=2):
        super(FauxHateDetector, self).__init__()
        self.model = XLMRobertaModel.from_pretrained(model_name)

        # Separate classification heads for 'faux' and 'hate'
        self.classifier_faux = nn.Linear(self.model.config.hidden_size, num_labels_task1)
        self.classifier_hate = nn.Linear(self.model.config.hidden_size, num_labels_task2)

    def forward(self, input_ids, attention_mask):
        # Get the embeddings from the base XLM-RoBERTa model
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0, :]  # CLS token embedding

        # Task-specific heads
        faux_logits = self.classifier_faux(cls_output)
        hate_logits = self.classifier_hate(cls_output)

        return faux_logits, hate_logits

In [6]:
import torch
from transformers import XLMRobertaTokenizer


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the tokenizer and the best model
tokenizer = XLMRobertaTokenizer.from_pretrained('xlm-roberta-base')
model = FauxHateDetector().to(device)
model.load_state_dict(torch.load("/content/drive/MyDrive/Icon Conference/multitaskXLMRoBERTa/best_model.pth", map_location=torch.device(device)))
model.eval()

def predict_single_tweet(model, tokenizer, tweet, max_len=128):
    # Tokenize the tweet
    encoding = tokenizer.encode_plus(
        tweet,
        max_length=max_len,
        truncation=True,
        padding='max_length',
        add_special_tokens=True,
        return_tensors='pt'
    )

    # Move input to the device
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)

    # Get predictions from the model
    with torch.no_grad():
        faux_logits, hate_logits = model(input_ids, attention_mask)

    # Get the predicted class (0 or 1) for both faux and hate tasks
    faux_prediction = torch.argmax(faux_logits, dim=1).item()
    hate_prediction = torch.argmax(hate_logits, dim=1).item()

    # Convert predictions to readable labels
    faux_label = "Fake" if faux_prediction == 1 else "Not Fake"
    hate_label = "Hate" if hate_prediction == 1 else "Not Hate"

    return faux_label, hate_label


  model.load_state_dict(torch.load("/content/drive/MyDrive/Icon Conference/multitaskXLMRoBERTa/best_model.pth", map_location=torch.device(device)))


In [8]:
import re
def clean_text(text):
    if not isinstance(text, str):
        return ''
    text = str(text)
    text = text.lower()
    text = re.sub(r'<br>', ' ', text)
    text = re.sub(r'http\S+|www\S+|https\S+', '', text)
    text = re.sub(r'@\w+|#+', '', text)
    text = re.sub(r'[^\x00-\x7F]+', ' ', text)
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    text = re.sub(r'\s+', ' ', text)

    return text.strip()

# Example Usage of Model

In [14]:
tweet = "@SwetaSinghAT Madam ap 2000 ke note me nano GPS chip dhundo,chai me cheeni koi aur dhund lega.yad h na yh iconic episode jab ap g huzoori me fake news chala rhi thi.Waise Galwan me hamare jawan shaheed hue na kyu hue us par kabhi minister ya PM se sawal k"
tweet = clean_text(tweet)
faux_label, hate_label = predict_single_tweet(model, tokenizer, tweet)
print(f"Faux Prediction: {faux_label}")
print(f"Hate Prediction: {hate_label}")

Faux Prediction: Not Fake
Hate Prediction: Not Hate
