In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from torch.utils.data import DataLoader, TensorDataset
import torch

In [4]:
# Load the dataset
df = pd.read_csv('data.csv')
print(df.head())

                                Unnamed: 0 Unnamed: 1
0                                     Text    Empathy
1  I'm sorry to hear about your situation.          1
2               The weather is nice today.          0
3        You must be feeling really happy!          1
4     Can't believe you made that mistake.          0


In [5]:
# Split the data into training and testing sets
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

In [None]:
# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

In [None]:
# Tokenize and encode the text data
train_encodings = tokenizer(list(train_data['Text']), truncation=True, padding=True, max_length=128, return_tensors='pt')
test_encodings = tokenizer(list(test_data['Text']), truncation=True, padding=True, max_length=128, return_tensors='pt')

In [None]:
# Create PyTorch DataLoader
train_dataset = TensorDataset(train_encodings['input_ids'], train_encodings['attention_mask'], torch.tensor(list(train_data['Empathy'])))
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)


In [None]:
# Define optimizer and loss function
optimizer = AdamW(model.parameters(), lr=1e-5)
criterion = torch.nn.CrossEntropyLoss()

In [None]:

# Train the model
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    for batch in train_loader:
        input_ids, attention_mask, labels = batch
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

In [None]:
# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    test_outputs = model(test_encodings['input_ids'], attention_mask=test_encodings['attention_mask'])
    predictions = torch.argmax(test_outputs.logits, dim=1).numpy()

In [None]:
# Display accuracy and classification report
print(f'Accuracy: {accuracy_score(test_data["Empathy"], predictions)}')
print(classification_report(test_data['Empathy'], predictions))