# Customer_Satisfaction_Transformer

üéØ Purpose: Predict satisfaction rating (1‚Äì5) using DistilBERT fine-tuning.

‚öôÔ∏è Run on Google Colab GPU or a local GPU environment.

# Install dependencies

In [1]:
!pip install torch scikit-learn --quiet
!pip install -U transformers



In [2]:
import transformers
print(transformers.__version__)

4.57.1


# Load dataset

In [3]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments

In [4]:
import warnings
warnings.filterwarnings('ignore')

import os
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"

In [5]:
# Load data

df = pd.read_csv("customer_support_tickets.csv")

In [6]:
# Keep only rows with valid satisfaction rating
df = df[df['Customer Satisfaction Rating'].notna()]
df['label'] = df['Customer Satisfaction Rating'].astype(int) - 1

In [7]:
# Combine text fields
df['text'] = df['Ticket Subject'].fillna('') + ' ' + df['Ticket Description'].fillna('')

In [8]:
print("Dataset shape:", df.shape)
df[['text', 'label']].head()

Dataset shape: (2769, 19)


Unnamed: 0,text,label
2,Network problem I'm facing a problem with my {...,2
3,Account access I'm having an issue with the {p...,2
4,Data loss I'm having an issue with the {produc...,0
10,Data loss I'm having an issue with the {produc...,0
11,Software bug I'm having an issue with the {pro...,0


# Tokenization

In [9]:
# Split dataset
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df['text'], df['label'], test_size=0.2, stratify=df['label'], random_state=42
)

In [10]:
# Load tokenizer
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')


In [11]:
# Tokenize
train_encodings = tokenizer(train_texts.tolist(), truncation=True, padding=True, max_length=128)
val_encodings = tokenizer(val_texts.tolist(), truncation=True, padding=True, max_length=128)

# Prepare PyTorch Dataset

In [12]:
class CSATDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels.iloc[idx])
        return item
    def __len__(self):
        return len(self.labels)

train_dataset = CSATDataset(train_encodings, train_labels)
val_dataset = CSATDataset(val_encodings, val_labels)

# Load model and train

In [2]:
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments

model = AutoModelForSequenceClassification.from_pretrained(
    'distilbert-base-uncased',
    num_labels=5
)

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=2,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_dir='./logs',
    load_best_model_at_end=True,
    evaluation_strategy="epoch",   # or "steps"
    logging_strategy="steps",
    logging_steps=100,
    save_strategy="epoch"          # ensures checkpoints are saved
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

trainer.train()

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


TypeError: TrainingArguments.__init__() got an unexpected keyword argument 'evaluation_strategy'

# Evaluate & Save Model

In [None]:
# Evaluation
predictions = trainer.predict(val_dataset)
y_pred = predictions.predictions.argmax(axis=1)

print(classification_report(val_labels, y_pred))

In [None]:
# Save model
model.save_pretrained('/mnt/data/csat_distilbert_model/')
tokenizer.save_pretrained('/mnt/data/csat_distilbert_model/')
print("‚úÖ Model saved to /mnt/data/csat_distilbert_model/")

In [None]:
print(transformers.__version__