In [1]:
import pandas as pd
import numpy as np
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset, load_dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
import pickle
import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv('dataset/csv/processed_data.csv')

In [3]:
np.random.seed(42)
df['sentiment'] = df['original_label'].copy()
sentiment_map = {0: 'Negative', 1: 'Positive'}
df['sentiment_label'] = df['sentiment'].map(sentiment_map)

In [4]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['sentiment'])
train_df, val_df = train_test_split(train_df, test_size=0.1, random_state=42, stratify=train_df['sentiment'])
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

In [5]:
def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=128)

In [6]:
train_dataset = Dataset.from_dict({
    'text': train_df['processed_text'].tolist(),
    'label': train_df['sentiment'].tolist()
})
val_dataset = Dataset.from_dict({
    'text': val_df['processed_text'].tolist(),
    'label': val_df['sentiment'].tolist()
})
test_dataset = Dataset.from_dict({
    'text': test_df['processed_text'].tolist(),
    'label': test_df['sentiment'].tolist()
})

In [7]:
train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

Map: 100%|██████████| 1080/1080 [00:00<00:00, 1150.95 examples/s]
Map: 100%|██████████| 120/120 [00:00<00:00, 1250.62 examples/s]
Map: 100%|██████████| 300/300 [00:00<00:00, 1265.46 examples/s]


In [8]:
model = DistilBertForSequenceClassification.from_pretrained(
    'distilbert-base-uncased',
    num_labels=2
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
def compute_metric(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }


In [10]:
training_args = TrainingArguments(
    output_dir='results',
    num_train_epochs=2,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='logs',
    logging_steps=100,
    save_strategy="epoch",
    eval_strategy="epoch",  
    load_best_model_at_end=True,
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metric,
)
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.675076,0.641667,0.591998,0.792729,0.641667
2,0.673900,0.440777,0.783333,0.780771,0.794467,0.783333


TrainOutput(global_step=136, training_loss=0.6332800248089958, metrics={'train_runtime': 963.1028, 'train_samples_per_second': 2.243, 'train_steps_per_second': 0.141, 'total_flos': 71532395274240.0, 'train_loss': 0.6332800248089958, 'epoch': 2.0})

In [11]:
predictions = trainer.predict(test_dataset)
preds = predictions.predictions.argmax(-1)
labels = predictions.label_ids
accuracy = accuracy_score(labels, preds)
precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')

In [12]:
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Accuracy: 0.8400
Precision: 0.8536
Recall: 0.8400
F1 Score: 0.8381


In [13]:
print(classification_report(labels, preds, target_names=['Negative', 'Positive']))

              precision    recall  f1-score   support

    Negative       0.79      0.94      0.86       153
    Positive       0.92      0.73      0.82       147

    accuracy                           0.84       300
   macro avg       0.85      0.84      0.84       300
weighted avg       0.85      0.84      0.84       300



In [14]:
metrics_df = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1]
})
metrics_df.to_csv('dataset/csv/evaluation_metrics.csv', index=False)

In [15]:
model.save_pretrained('sentiment_model')
tokenizer.save_pretrained('sentiment_model')

('sentiment_model/tokenizer_config.json',
 'sentiment_model/special_tokens_map.json',
 'sentiment_model/vocab.txt',
 'sentiment_model/added_tokens.json')

In [16]:
model_info = {
    'model_path': 'sentiment_model',
    'tokenizer': tokenizer,
    'label_map': {0: 'Negative', 1: 'Positive'},
    'metrics': {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    }
}

with open('sentiment_model.pkl', 'wb') as f:
    pickle.dump(model_info, f)

In [17]:
def predict_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
    outputs = model(**inputs)
    prediction = torch.argmax(outputs.logits, dim=1).item()
    label_map = {0: 'Negative', 1: 'Positive'}
    return label_map[prediction]

In [18]:
test_texts = [
    "This product is amazing! I love it!",
    "Terrible quality, waste of money.",
    "It's okay, nothing special."
]
for text in test_texts:
    sentiment = predict_sentiment(text)
    print(f"Predicted Sentiment: {sentiment}")

Predicted Sentiment: Positive
Predicted Sentiment: Negative
Predicted Sentiment: Positive
