In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Load the BERT tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=3)

# Define the possible stance labels
stance_labels = ["FAVOR", "AGAINST", "NEITHER"]

# Function to predict the stance of a given tweet and target
def predict_stance(tweet, target):
    # Tokenize the tweet and target
    inputs = tokenizer.encode_plus(tweet, target, padding=True, truncation=True, max_length=128, return_tensors="pt")

    # Pass the input through the BERT model to get the predicted stance
    outputs = model(inputs["input_ids"], attention_mask=inputs["attention_mask"])
    predicted_stance = torch.argmax(outputs.logits, dim=1)

    # Map the predicted stance to the corresponding label
    predicted_label = stance_labels[predicted_stance]

    return predicted_label


  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification

In [2]:
tweet = "Abortion is a choice. It should be legalized"
target = "Legalization of Abortion"
predicted_stance = predict_stance(tweet, target)
print(predicted_stance) # Output: 'support'


NEITHER


# GPT

In [33]:
import openai
openai.api_key = "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"

def predict_stance(tweet, target):
    prompt = f"Tweet: {tweet}\nTarget: {target}\nStance:"
    response = openai.Completion.create(
        engine="text-davinci-002",
        prompt=prompt,
        max_tokens=10,
        n=1,
        stop=None,
        temperature=0.8,
    )
    stance = response.choices[0].text.strip()
    return stance

In [34]:
tweet = "I do not believe in god."
target = "Athiesm"
stance = predict_stance(tweet, target)
print(stance)

Disagree


# BERT

In [None]:
from sklearn.model_selection import train_test_split
import torch
import transformers
import pandas as pd
import numpy as np


In [None]:
# Load the pre-trained BERT model and tokenizer
tokenizer = transformers.BertTokenizer.from_pretrained('bert-base-uncased')
model = transformers.BertModel.from_pretrained('bert-base-uncased')

In [None]:
# Define the neural network layer for stance detection
class StanceClassifier(torch.nn.Module):
    def __init__(self):
        super(StanceClassifier, self).__init__()
        self.dense = torch.nn.Linear(768, 3)
        
    def forward(self, tokens):
        _, pooled_output = model(tokens)
        logits = self.dense(pooled_output)
        return logits

In [None]:
# Load the stance detection dataset

data = pd.read_csv("../Datasets/restructured_data.csv")

train, test = train_test_split(data, test_size=0.2)

In [None]:
# Tokenize the tweet and target using the BERT tokenizer
train_tokens = tokenizer.batch_encode_plus(
    list(train['Data']), list(train['Stance']),
    max_length=128, pad_to_max_length=True, truncation=True,
    return_tensors='pt'
)

test_tokens = tokenizer.batch_encode_plus(
    list(test['Data']), list(test['Stance']),
    max_length=128, pad_to_max_length=True, truncation=True,
    return_tensors='pt'
)

In [None]:
# Convert the labels to integers
train_labels = np.array(list(train['Stance'].map({'FAVOR': 0, 'AGAINST': 1, 'NONE': 2})))
test_labels = np.array(list(test['Stance'].map({'FAVOR': 0, 'AGAINST': 1, 'NONE': 2})))

In [None]:
# Create PyTorch DataLoader objects for training and testing
train_dataset = torch.utils.data.TensorDataset(train_tokens['input_ids'], train_tokens['attention_mask'], torch.tensor(train_labels))
test_dataset = torch.utils.data.TensorDataset(test_tokens['input_ids'], test_tokens['attention_mask'], torch.tensor(test_labels))
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [3]:
# Train the model

import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = StanceClassifier().to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)

for epoch in range(1):
    running_loss = 0.0
    for inputs, masks, labels in train_loader:
        inputs, masks, labels = inputs.to(device), masks.to(device), labels.to(device)
        optimizer.zero_grad()
        logits = model(inputs)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch {epoch+1}: loss={running_loss/len(train_loader)}')

  from .autonotebook import tqdm as notebook_tqdm


NameError: name 'StanceClassifier' is not defined

In [None]:
# Evaluate the model on the test set
with torch.no_grad():
    model.eval()
    correct = 0
    total = 0
    for inputs, masks, labels in test_loader:
        inputs, masks, labels = inputs.to(device), masks.to(device), labels.to(device)
        logits = model(inputs)
        predictions = torch.argmax(logits, dim=1)
        correct += torch.sum(predictions == labels)
        total += len(labels)
    accuracy = correct/total
    print(f'Test accuracy: {accuracy}')