Import libraries

In [3]:
import pandas as pd
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertForSequenceClassification
import torch

Load BERT model and tokenizer

In [4]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Function to preprocess text

In [21]:
def preprocess_text_list(texts):
    inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512, is_split_into_words=True)
    return inputs

Load dataset (replace 'your_dataset.csv' with your actual dataset)

In [22]:
df = pd.read_csv('reviews.csv')

Preprocess the data

In [23]:
df['text'] = df['text'].astype(str)

In [24]:
inputs = preprocess_text_list(df['text'].tolist())

Create DataLoader

In [25]:
class TextDataset(Dataset):
    def __init__(self, inputs):
        self.inputs = inputs

    def __len__(self):
        return len(self.inputs['input_ids'])

    def __getitem__(self, idx):
        return {'input_ids': self.inputs['input_ids'][idx],
                'attention_mask': self.inputs['attention_mask'][idx]}

dataset = TextDataset(inputs)
dataloader = DataLoader(dataset, batch_size=8, shuffle=False)

Training loop (you may need to adjust this based on your dataset size and requirements)

In [38]:
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs = 3
for epoch in range(num_epochs):
    model.train()
    for batch in dataloader:
        inputs = {key: batch[key].to(device) for key in batch}

        # Check for NaN or infinite values in the inputs
        if any(torch.isnan(inputs[key]).any() or torch.isinf(inputs[key]).any() for key in inputs):
            print("NaN or infinite values detected in inputs. Skipping this batch.")
            continue

        # Ensure model outputs 'loss' during training
        model_outputs = model(**inputs)

        # Use the loss directly from model_outputs
        loss = model_outputs.loss if hasattr(model_outputs, 'loss') else None

        # Check if loss is valid (not None, NaN, or infinite)
        if loss is None or torch.isnan(loss) or torch.isinf(loss):
            print("Invalid loss value. Skipping this batch.")
            continue

        loss.backward()

        # Clip gradients to prevent exploding gradients
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()
        optimizer.zero_grad()

Invalid loss value. Skipping this batch.
Invalid loss value. Skipping this batch.
Invalid loss value. Skipping this batch.


Example usage for prediction

In [39]:
def predict_sentiment(text):
    model.eval()
    with torch.no_grad():
        inputs = preprocess_text_list(text)
        inputs = {key: inputs[key].to(device) for key in inputs}
        outputs = model(**inputs)
        predicted_label = torch.argmax(outputs.logits).item()
    return predicted_label

Example usage for prediction

In [40]:
text_to_predict = "Bad Food is not upto the mark. AC is not working. Overall no cooling. Pao for paobhaji is soggy. Chat they r serving doesn’t seems chat even. Dal makhani is yuckk. Fried rice & Manchurian both r tasteless. The only think u can eat is gobhi kulcha."
predicted_sentiment = predict_sentiment(text_to_predict)

In [41]:
sentiment_mapping = {0: 'Negative', 1: 'Positive'}
print(f'Text: "{text_to_predict}"')
print(f'Predicted Sentiment: {sentiment_mapping[predicted_sentiment]}')
print(predicted_sentiment)

Text: "Bad Food is not upto the mark. AC is not working. Overall no cooling. Pao for paobhaji is soggy. Chat they r serving doesn’t seems chat even. Dal makhani is yuckk. Fried rice & Manchurian both r tasteless. The only think u can eat is gobhi kulcha."
Predicted Sentiment: Positive
1
