<a href="https://colab.research.google.com/github/Diksh-aaa/Sentiment-Analysis/blob/main/User_Feedback_Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import torch
from torch.utils.data import Dataset, DataLoader

In [2]:
class FeedbackDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        text = str(self.texts[item])
        label = self.labels[item]

        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            return_attention_mask=True,
            return_tensors='pt',
        )

        return {
            'text': text,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

In [3]:
import pandas as pd

# Define the data
data = {
    "text": [
        "I love the new events page! It's so easy to use",
        "The events page is confusing and hard to navigate",
        "The new design is okay, but I preferred the old one"
    ],
    "sentiment": [
        "positive",
        "negative",
        "neutral"
    ]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
df.to_csv('feedback.csv', index=False)

print("CSV file created successfully!")

CSV file created successfully!


In [4]:
# Load and preprocess data
df = pd.read_csv('feedback.csv')
df['label'] = df['sentiment'].map({'positive': 0, 'negative': 1, 'neutral': 2})

In [5]:
# Split data
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df['text'].values, df['label'].values, test_size=0.2, random_state=42)

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

train_dataset = FeedbackDataset(train_texts, train_labels, tokenizer, max_len=128)
val_dataset = FeedbackDataset(val_texts, val_labels, tokenizer, max_len=128)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [6]:
# Step 2: Model Selection and Setup
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
!pip install accelerate -U
!pip install transformers[torch]

from transformers import TrainingArguments, Trainer



In [8]:
!pip install --upgrade accelerate transformers[torch]



In [9]:
# Step 3: Model Training
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

trainer.train()

Step,Training Loss


TrainOutput(global_step=3, training_loss=0.9780092239379883, metrics={'train_runtime': 21.0891, 'train_samples_per_second': 0.285, 'train_steps_per_second': 0.142, 'total_flos': 394670126592.0, 'train_loss': 0.9780092239379883, 'epoch': 3.0})

In [10]:
# Step 4: Model Evaluation
eval_result = trainer.evaluate()

In [11]:
# Step 5: Sentiment Analysis and Summary Generation
test_dataset = FeedbackDataset(df['text'].values, df['label'].values, tokenizer, max_len=128)
predictions = trainer.predict(test_dataset)

# Extract predicted labels (assuming predictions.predictions contains logits)
import numpy as np
pred_labels = np.argmax(predictions.predictions, axis=1)

In [12]:
# Function to generate summary (needs to be implemented based on your specific requirements)
def generate_summary(texts, labels):
    # Example implementation (pseudo-code)
    positive_feedback = [texts[i] for i in range(len(labels)) if labels[i] == 0]
    negative_feedback = [texts[i] for i in range(len(labels)) if labels[i] == 1]
    neutral_feedback = [texts[i] for i in range(len(labels)) if labels[i] == 2]

    summary = f"Overall, users have a {'positive' if len(positive_feedback) > len(negative_feedback) else 'negative'} perception of the Events Page. "
    if positive_feedback:
        summary += f"Positive aspects mentioned include: {', '.join(positive_feedback[:3])}. "
    if negative_feedback:
        summary += f"Negative aspects mentioned include: {', '.join(negative_feedback[:3])}. "
    if neutral_feedback:
        summary += f"Neutral feedback includes: {', '.join(neutral_feedback[:3])}."

    return summary

# Generate and print summary
summary = generate_summary(df['text'].values, pred_labels) # Use the defined pred_labels variable
print(summary)

Overall, users have a negative perception of the Events Page. Negative aspects mentioned include: I love the new events page! It's so easy to use, The events page is confusing and hard to navigate, The new design is okay, but I preferred the old one. 


In [13]:
# Define a function to get predictions
def f(x):
    inputs = tokenizer(list(x), padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        logits = wrapped_model(inputs['input_ids'], inputs['attention_mask'])
    return logits.numpy()