<a href="https://colab.research.google.com/github/NehaPunetha/Negation-Unsupervised-STATITICAL/blob/main/Compsarisio_1_with_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# -----------------------------
# COMMON SETUP (run this first)
# -----------------------------
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load dataset
df = pd.read_csv("/content/Amazon_Negation_Reviews_Dataset_500 (2).csv")

print(df['True_Label'].value_counts())
print(df.head())
print(df.columns)

# üëâ Correct column names for YOUR dataset
TEXT_COL  = 'Review'
LABEL_COL = 'True_Label'

# Keep only required columns and drop NaNs
df = df[[TEXT_COL, LABEL_COL]].dropna()

# Encode labels (Positive / Negative / Neutral) ‚Üí 0,1,2
label_encoder = LabelEncoder()
df['label_id'] = label_encoder.fit_transform(df[LABEL_COL])

print("\nLabel mapping:")
for cls, idx in zip(label_encoder.classes_, range(len(label_encoder.classes_))):
    print(f"{cls} -> {idx}")

# Train‚Äìtest split
X_train, X_test, y_train, y_test = train_test_split(
    df[TEXT_COL].values,
    df['label_id'].values,
    test_size=0.2,
    random_state=42,
    stratify=df['label_id']
)

print("\nTrain size:", len(X_train))
print("Test size:", len(X_test))


True_Label
Neutral     237
Positive    132
Negative    131
Name: count, dtype: int64
   ID                                  Review  Has_Negation Negation_Target  \
0   1                      The shoes is slow.             0             NaN   
1   2          I found the shoes quite worst.             0             NaN   
2   3  This shoes turned out to be not worst.             1           worst   
3   4       This phone turned out to be slow.             0             NaN   
4   5                  The pillow is amazing.             0             NaN   

  True_Label  
0   Negative  
1   Negative  
2    Neutral  
3   Negative  
4   Positive  
Index(['ID', 'Review', 'Has_Negation', 'Negation_Target', 'True_Label'], dtype='object')

Label mapping:
Negative -> 0
Neutral -> 1
Positive -> 2

Train size: 400
Test size: 100


1Ô∏è‚É£ Logistic Regression + TF-IDF

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

# TF-IDF vectorizer
tfidf = TfidfVectorizer(
    max_features=5000,
    ngram_range=(1, 2),
    lowercase=True
)

X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf  = tfidf.transform(X_test)

# Logistic Regression model
log_reg = LogisticRegression(max_iter=1000, n_jobs=-1)

log_reg.fit(X_train_tfidf, y_train)
y_pred_lr = log_reg.predict(X_test_tfidf)

print("üîπ Logistic Regression + TF-IDF")
print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_lr))
print("Classification Report:\n", classification_report(y_test, y_pred_lr, target_names=label_encoder.classes_))


üîπ Logistic Regression + TF-IDF
Accuracy: 0.82
Confusion Matrix:
 [[20  5  1]
 [ 6 39  3]
 [ 1  2 23]]
Classification Report:
               precision    recall  f1-score   support

    Negative       0.74      0.77      0.75        26
     Neutral       0.85      0.81      0.83        48
    Positive       0.85      0.88      0.87        26

    accuracy                           0.82       100
   macro avg       0.81      0.82      0.82       100
weighted avg       0.82      0.82      0.82       100



2Ô∏è‚É£ SVM + TF-IDF

In [5]:
from sklearn.svm import LinearSVC

# Reuse X_train_tfidf, X_test_tfidf from above

svm_clf = LinearSVC()

svm_clf.fit(X_train_tfidf, y_train)
y_pred_svm = svm_clf.predict(X_test_tfidf)

print("üîπ SVM + TF-IDF")
print("Accuracy:", accuracy_score(y_test, y_pred_svm))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_svm))
print("Classification Report:\n", classification_report(y_test, y_pred_svm, target_names=label_encoder.classes_))


üîπ SVM + TF-IDF
Accuracy: 0.84
Confusion Matrix:
 [[21  5  0]
 [ 6 39  3]
 [ 1  1 24]]
Classification Report:
               precision    recall  f1-score   support

    Negative       0.75      0.81      0.78        26
     Neutral       0.87      0.81      0.84        48
    Positive       0.89      0.92      0.91        26

    accuracy                           0.84       100
   macro avg       0.84      0.85      0.84       100
weighted avg       0.84      0.84      0.84       100



3Ô∏è‚É£ VADER (Rule-based, with Negation Handling)

In [6]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

# Download once
nltk.download('vader_lexicon')

sia = SentimentIntensityAnalyzer()

# Get the numeric id for "Positive" / "Negative"
# (Assumes labels are exactly 'Positive' and 'Negative')
pos_id = label_encoder.transform(['Positive'])[0]
neg_id = label_encoder.transform(['Negative'])[0]

def vader_predict(texts, threshold=0.0):
    preds = []
    for t in texts:
        scores = sia.polarity_scores(str(t))
        compound = scores['compound']
        if compound >= threshold:
            preds.append(pos_id)
        else:
            preds.append(neg_id)
    return np.array(preds)

y_pred_vader = vader_predict(X_test)

print("üîπ VADER")
print("Accuracy:", accuracy_score(y_test, y_pred_vader))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_vader))
print("Classification Report:\n", classification_report(y_test, y_pred_vader, target_names=label_encoder.classes_))


üîπ VADER
Accuracy: 0.48
Confusion Matrix:
 [[22  0  4]
 [21  0 27]
 [ 0  0 26]]
Classification Report:
               precision    recall  f1-score   support

    Negative       0.51      0.85      0.64        26
     Neutral       0.00      0.00      0.00        48
    Positive       0.46      1.00      0.63        26

    accuracy                           0.48       100
   macro avg       0.32      0.62      0.42       100
weighted avg       0.25      0.48      0.33       100



[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


BiLSTM (Keras)

In [7]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout

# -----------------------------
# Tokenization & Padding
# -----------------------------
max_words = 10000
max_len = 50

tokenizer_keras = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer_keras.fit_on_texts(X_train)

X_train_seq = tokenizer_keras.texts_to_sequences(X_train)
X_test_seq  = tokenizer_keras.texts_to_sequences(X_test)

X_train_pad = pad_sequences(X_train_seq, maxlen=max_len, padding='post', truncating='post')
X_test_pad  = pad_sequences(X_test_seq, maxlen=max_len, padding='post', truncating='post')

# -----------------------------
# BiLSTM Model
# -----------------------------
embedding_dim = 128

bilstm_model = Sequential([
    Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_len),
    Bidirectional(LSTM(64, return_sequences=False)),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary output
])

bilstm_model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

bilstm_model.summary()

# -----------------------------
# Training
# -----------------------------
history = bilstm_model.fit(
    X_train_pad,
    y_train,
    epochs=5,           # you can tune
    batch_size=16,      # small dataset, small batch
    validation_split=0.1,
    verbose=1
)

# -----------------------------
# Evaluation
# -----------------------------
y_pred_prob = bilstm_model.predict(X_test_pad)
y_pred_bilstm = (y_pred_prob >= 0.5).astype(int).reshape(-1)

print("üîπ BiLSTM (Keras)")
print("Accuracy:", accuracy_score(y_test, y_pred_bilstm))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_bilstm))
print("Classification Report:\n", classification_report(y_test, y_pred_bilstm, target_names=label_encoder.classes_))




Epoch 1/5
[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m7s[0m 78ms/step - accuracy: 0.4201 - loss: 0.4733 - val_accuracy: 0.4250 - val_loss: 0.3619
Epoch 2/5
[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 58ms/step - accuracy: 0.4509 - loss: -0.0783 - val_accuracy: 0.4250 - val_loss: 0.4443
Epoch 3/5
[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 59ms/step - accuracy: 0.4973 - loss: 0.2237 - val_accuracy: 0.4250 - val_loss: 0.4922
Epoch 4/5
[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 58ms/step - accuracy: 0.4708 - loss: -0.2632 - val_accuracy: 0.4250 - val_loss: 0.6360
Epoch 5/5
[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 57ms/step - accuracy: 0.4754 - loss: -0.2503 - val_accuracy: 0.4250 - val_loss: 0.56

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


5Ô∏è‚É£ Shared PyTorch Utilities for Transformer Models

We‚Äôll reuse these for BERT-base, RoBERTa-base, NegBERT.

In [4]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

class ReviewsDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = list(texts)
        self.labels = list(labels)
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text  = str(self.texts[idx])
        label = int(self.labels[idx])

        encoding = self.tokenizer(
            text,
            add_special_tokens=True,
            truncation=True,
            max_length=self.max_len,
            padding='max_length',
            return_attention_mask=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

def train_epoch(model, data_loader, optimizer, scheduler, device):
    model.train()
    total_loss = 0

    for batch in data_loader:
        optimizer.zero_grad()

        input_ids      = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels         = batch['labels'].to(device)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )

        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

    return total_loss / len(data_loader)

def eval_model(model, data_loader, device):
    model.eval()
    preds = []
    true_labels = []

    with torch.no_grad():
        for batch in data_loader:
            input_ids      = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels         = batch['labels'].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask
            )

            logits = outputs.logits
            predictions = torch.argmax(logits, dim=-1)

            preds.extend(predictions.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    acc = accuracy_score(true_labels, preds)
    cm  = confusion_matrix(true_labels, preds)
    cr  = classification_report(true_labels, preds, target_names=label_encoder.classes_)

    return acc, cm, cr


Using device: cpu


**6Ô∏è‚É£ BERT-base (uncased)**

In [5]:
from transformers import BertTokenizerFast, BertForSequenceClassification

MODEL_NAME = "bert-base-uncased"
max_length = 128
batch_size = 8
epochs = 4
lr = 2e-5

tokenizer_bert = BertTokenizerFast.from_pretrained(MODEL_NAME)

train_dataset_bert = ReviewsDataset(X_train, y_train, tokenizer_bert, max_length)
test_dataset_bert  = ReviewsDataset(X_test, y_test, tokenizer_bert, max_length)

train_loader_bert = DataLoader(train_dataset_bert, batch_size=batch_size, shuffle=True)
test_loader_bert  = DataLoader(test_dataset_bert, batch_size=batch_size, shuffle=False)

num_labels = len(label_encoder.classes_)
model_bert = BertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=num_labels).to(device)

optimizer_bert = AdamW(model_bert.parameters(), lr=lr)
total_steps_bert = len(train_loader_bert) * epochs

scheduler_bert = get_linear_schedule_with_warmup(
    optimizer_bert,
    num_warmup_steps=int(0.1 * total_steps_bert),
    num_training_steps=total_steps_bert
)

for epoch in range(epochs):
    print(f"\n====== BERT Epoch {epoch+1}/{epochs} ======")
    train_loss = train_epoch(model_bert, train_loader_bert, optimizer_bert, scheduler_bert, device)
    print(f"Train loss: {train_loss:.4f}")

    acc, cm, cr = eval_model(model_bert, test_loader_bert, device)
    print("Accuracy:", acc)
    print("Confusion Matrix:\n", cm)
    print("Classification Report:\n", cr)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Train loss: 0.9588
Accuracy: 0.69
Confusion Matrix:
 [[24  2  0]
 [ 9 39  0]
 [17  3  6]]
Classification Report:
               precision    recall  f1-score   support

    Negative       0.48      0.92      0.63        26
     Neutral       0.89      0.81      0.85        48
    Positive       1.00      0.23      0.38        26

    accuracy                           0.69       100
   macro avg       0.79      0.66      0.62       100
weighted avg       0.81      0.69      0.67       100


Train loss: 0.6160
Accuracy: 0.87
Confusion Matrix:
 [[22  4  0]
 [ 5 39  4]
 [ 0  0 26]]
Classification Report:
               precision    recall  f1-score   support

    Negative       0.81      0.85      0.83        26
     Neutral       0.91      0.81      0.86        48
    Positive       0.87      1.00      0.93        26

    accuracy                           0.87       100
   macro avg       0.86      0.89      0.87       100
weighted avg       0.87      0.87      0.87       100


Train l

**7Ô∏è‚É£ RoBERTa-base**

In [6]:
from transformers import RobertaTokenizerFast, RobertaForSequenceClassification

MODEL_NAME = "roberta-base"
max_length = 128
batch_size = 8
epochs = 4
lr = 2e-5

tokenizer_rb = RobertaTokenizerFast.from_pretrained(MODEL_NAME)

train_dataset_rb = ReviewsDataset(X_train, y_train, tokenizer_rb, max_length)
test_dataset_rb  = ReviewsDataset(X_test, y_test, tokenizer_rb, max_length)

train_loader_rb = DataLoader(train_dataset_rb, batch_size=batch_size, shuffle=True)
test_loader_rb  = DataLoader(test_dataset_rb, batch_size=batch_size, shuffle=False)

num_labels = len(label_encoder.classes_)
model_rb = RobertaForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=num_labels).to(device)

optimizer_rb = AdamW(model_rb.parameters(), lr=lr)
total_steps_rb = len(train_loader_rb) * epochs

scheduler_rb = get_linear_schedule_with_warmup(
    optimizer_rb,
    num_warmup_steps=int(0.1 * total_steps_rb),
    num_training_steps=total_steps_rb
)

for epoch in range(epochs):
    print(f"\n====== RoBERTa Epoch {epoch+1}/{epochs} ======")
    train_loss = train_epoch(model_rb, train_loader_rb, optimizer_rb, scheduler_rb, device)
    print(f"Train loss: {train_loss:.4f}")

    acc, cm, cr = eval_model(model_rb, test_loader_rb, device)
    print("Accuracy:", acc)
    print("Confusion Matrix:\n", cm)
    print("Classification Report:\n", cr)


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Train loss: 1.0224
Accuracy: 0.61
Confusion Matrix:
 [[ 4 16  6]
 [ 0 48  0]
 [ 0 17  9]]
Classification Report:
               precision    recall  f1-score   support

    Negative       1.00      0.15      0.27        26
     Neutral       0.59      1.00      0.74        48
    Positive       0.60      0.35      0.44        26

    accuracy                           0.61       100
   macro avg       0.73      0.50      0.48       100
weighted avg       0.70      0.61      0.54       100


Train loss: 0.6118
Accuracy: 0.83
Confusion Matrix:
 [[21  5  0]
 [ 6 36  6]
 [ 0  0 26]]
Classification Report:
               precision    recall  f1-score   support

    Negative       0.78      0.81      0.79        26
     Neutral       0.88      0.75      0.81        48
    Positive       0.81      1.00      0.90        26

    accuracy                           0.83       100
   macro avg       0.82      0.85      0.83       100
weighted avg       0.83      0.83      0.83       100


Train l

**8Ô∏è‚É£ NegBERT**

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

NEG_BERT_MODEL = "bvanaken/clinical-assertion-negation-bert"  # example model

max_length = 128
batch_size = 8
epochs = 4
lr = 2e-5

tokenizer_nb = AutoTokenizer.from_pretrained(NEG_BERT_MODEL)

train_dataset_nb = ReviewsDataset(X_train, y_train, tokenizer_nb, max_length)
test_dataset_nb  = ReviewsDataset(X_test, y_test, tokenizer_nb, max_length)

train_loader_nb = DataLoader(train_dataset_nb, batch_size=batch_size, shuffle=True)
test_loader_nb  = DataLoader(test_dataset_nb, batch_size=batch_size, shuffle=False)

num_labels = len(label_encoder.classes_)
model_nb = AutoModelForSequenceClassification.from_pretrained(
    NEG_BERT_MODEL,
    num_labels=num_labels
).to(device)

optimizer_nb = AdamW(model_nb.parameters(), lr=lr)
total_steps_nb = len(train_loader_nb) * epochs

scheduler_nb = get_linear_schedule_with_warmup(
    optimizer_nb,
    num_warmup_steps=int(0.1 * total_steps_nb),
    num_training_steps=total_steps_nb
)

for epoch in range(epochs):
    print(f"\n====== NegBERT Epoch {epoch+1}/{epochs} ======")
    train_loss = train_epoch(model_nb, train_loader_nb, optimizer_nb, scheduler_nb, device)
    print(f"Train loss: {train_loss:.4f}")

    acc, cm, cr = eval_model(model_nb, test_loader_nb, device)
    print("Accuracy:", acc)
    print("Confusion Matrix:\n", cm)
    print("Classification Report:\n", cr)


model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]


Train loss: 1.3264
Accuracy: 0.8
Confusion Matrix:
 [[22  4  0]
 [ 5 38  5]
 [ 4  2 20]]
Classification Report:
               precision    recall  f1-score   support

    Negative       0.71      0.85      0.77        26
     Neutral       0.86      0.79      0.83        48
    Positive       0.80      0.77      0.78        26

    accuracy                           0.80       100
   macro avg       0.79      0.80      0.79       100
weighted avg       0.81      0.80      0.80       100


Train loss: 0.3866
Accuracy: 0.82
Confusion Matrix:
 [[23  3  0]
 [ 8 34  6]
 [ 0  1 25]]
Classification Report:
               precision    recall  f1-score   support

    Negative       0.74      0.88      0.81        26
     Neutral       0.89      0.71      0.79        48
    Positive       0.81      0.96      0.88        26

    accuracy                           0.82       100
   macro avg       0.81      0.85      0.82       100
weighted avg       0.83      0.82      0.82       100


