In [4]:
import torch
import torch.nn as nn
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from torch.optim import AdamW
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from torch.amp import GradScaler, autocast  
import numpy as np
import time

# Set device for CUDA (GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load DistilBERT tokenizer and model
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertForSequenceClassification.from_pretrained(
    'distilbert-base-uncased', num_labels=3
).to(device)


input_texts = [
    "How to inquire about Product A?", "Tell me more about Product B.", "I need information on Product C.",
    "Product A is interesting.", "How do I get Product B?", "I want to know more about Product C.",
    "Inquiry about Product A please.", "Can you provide Product B details?", "Information on Product C?",
    "Product A details requested."
]

# Additional Dataset for Better Learning
additional_texts = [
    "Can you share specifications of Product A?", "I’m interested in Product B, tell me more.", "Give me insights on Product C.",
    "How much does Product A cost?", "What’s the price of Product B?", "Tell me the pricing details for Product C.",
    "Do you have any promotions on Product A?", "Any discounts available for Product B?", "Can I get a special deal on Product C?",
    "Where can I buy Product A?", "Is Product B available in my region?", "How do I place an order for Product C?",
    "Can you compare Product A and C?", "Which is better: Product B or C?", "What are the unique benefits of Product A?",
    "Explain the advantages of using Product B.", "What are the key features of Product C?", "Do you offer free trials for Product A?",
    "How can I request a demo of Product B?", "Is there a demo version of Product C?", "Can I return Product A if I don’t like it?",
    "What’s the refund policy for Product B?", "Is there a return policy for Product C?", "Does Product A come with a warranty?",
    "What’s the warranty coverage for Product B?", "How long is Product C’s warranty period?", "How do I install Product A?",
    "Can you guide me through setting up Product B?", "What’s the installation process for Product C?",
    "Where can I find the user manual for Product A?", "Can you send me the instruction guide for Product B?", "Is there a setup guide for Product C?",
    "How do I update Product A?", "Are there any software updates for Product B?", "How frequently does Product C receive updates?",
    "What customer support options are available for Product A?", "How do I contact technical support for Product B?", "Where can I get help with Product C?",
    "What are the payment options for Product A?", "Can I pay in installments for Product B?", "Is there a subscription plan for Product C?",
    "How secure is Product A?", "Does Product B have data protection features?", "What security measures does Product C include?",
    "Is Product A compatible with other platforms?", "Can I integrate Product B with my existing tools?", "Does Product C work with third-party apps?",
    "How do I deactivate my Product A account?", "Can I pause my subscription to Product B?", "How do I cancel my Product C service?",
    "How to inquire about Product A?", "Tell me more about Product B.", "I need information on Product C.",
    "Product A is interesting.", "How do I get Product B?", "I want to know more about Product C.",
    "Inquiry about Product A please.", "Can you provide Product B details?", "Information on Product C?",
    "Product A details requested.", "Can I learn more about Product A?", "Details about Product B are required.",
    "Help me with Product C.", "What's the best way to contact for Product A?", "How can I receive more information on Product B?",
    "I need some information on Product C right now.", "Inquire about Product A", "Provide me Product B details.",
    "How do I purchase Product A?", "I want to ask about Product B's features.", "Tell me about Product C features.",
    "Is there any special offer for Product A?", "Product B is the best option, right?", "I want to compare Product A with Product B.",
    "How do I contact customer support for Product C?", "Give me details of Product A specs.", "What makes Product B special?",
    "Where can I find reviews for Product C?", "Product A and B seem similar, what’s the difference?", "Can I get a discount on Product C?",
    "How do I subscribe to Product A updates?", "Send me the brochure for Product B.", "Explain the warranty policy for Product C."
]
input_texts.extend(additional_texts)
labels = [0, 1, 2] * (len(input_texts) // 3)  # Cycle labels evenly

# Tokenization
def tokenize_data(texts):
    return tokenizer(texts, padding=True, truncation=True, return_tensors='pt', max_length=128)

encoded_data = tokenize_data(input_texts)
input_ids = encoded_data['input_ids']
attention_mask = encoded_data['attention_mask']
labels = torch.tensor(labels)

# Check tensor sizes
assert input_ids.size(0) == attention_mask.size(0) == labels.size(0), "Mismatch in tensor sizes!"

# Create dataset and DataLoader
dataset = TensorDataset(input_ids, attention_mask, labels)
batch_size = 8
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Initialize optimizer & scheduler with weight decay
optimizer = AdamW(model.parameters(), lr=5e-5, weight_decay=0.01)  
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=2, verbose=True)

# Training function
def train(model, train_loader, optimizer):
    model.train()
    total_loss = 0
    scaler = GradScaler()  
    
    for batch in train_loader:
        input_ids, attention_mask, labels = [x.to(device) for x in batch]
        optimizer.zero_grad()

        with autocast('cuda'):  
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()

    return total_loss / len(train_loader)

# Validation function
def validate(model, val_loader):
    model.eval()
    preds, true_labels = [], []
    
    with torch.no_grad():
        for batch in val_loader:
            input_ids, attention_mask, labels = [x.to(device) for x in batch]
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds.extend(torch.argmax(outputs.logits, dim=-1).cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    return accuracy_score(true_labels, preds)

# Cross-validation
def cross_validate():
    kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    
    for fold, (train_idx, val_idx) in enumerate(kfold.split(input_ids, labels)):
        print(f"\nTraining Fold {fold + 1}")

        train_loader = DataLoader(torch.utils.data.Subset(dataset, train_idx), batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(torch.utils.data.Subset(dataset, val_idx), batch_size=batch_size)

        best_fold_val_accuracy = 0
        epochs_without_improvement = 0

        for epoch in range(5):
            print(f"Epoch {epoch + 1}")
            avg_train_loss = train(model, train_loader, optimizer)
            print(f"Avg Training Loss: {avg_train_loss:.4f}")

            val_accuracy = validate(model, val_loader)
            print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

            if val_accuracy > best_fold_val_accuracy:
                best_fold_val_accuracy = val_accuracy
                epochs_without_improvement = 0
            else:
                epochs_without_improvement += 1

            if epochs_without_improvement >= 3:
                print("Early stopping...")
                break

            scheduler.step(val_accuracy)

cross_validate()


Using device: cuda


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Training Fold 1
Epoch 1
Avg Training Loss: 1.1168
Validation Accuracy: 35.48%
Epoch 2
Avg Training Loss: 1.0095
Validation Accuracy: 64.52%
Epoch 3
Avg Training Loss: 0.7703
Validation Accuracy: 51.61%
Epoch 4
Avg Training Loss: 0.5269
Validation Accuracy: 70.97%
Epoch 5
Avg Training Loss: 0.3529
Validation Accuracy: 64.52%

Training Fold 2
Epoch 1
Avg Training Loss: 0.5834
Validation Accuracy: 96.77%
Epoch 2
Avg Training Loss: 0.4318
Validation Accuracy: 96.77%
Epoch 3
Avg Training Loss: 0.2952
Validation Accuracy: 90.32%
Epoch 4
Avg Training Loss: 0.2516
Validation Accuracy: 83.87%
Early stopping...

Training Fold 3
Epoch 1
Avg Training Loss: 0.2691
Validation Accuracy: 100.00%
Epoch 2
Avg Training Loss: 0.2286
Validation Accuracy: 96.77%
Epoch 3
Avg Training Loss: 0.0607
Validation Accuracy: 93.55%
Epoch 4
Avg Training Loss: 0.1928
Validation Accuracy: 96.77%
Early stopping...


In [7]:
from torch.optim import AdamW
from torch.utils.data import DataLoader
from tqdm import tqdm  # For progress bar

# Define the training parameters
epochs = 3
batch_size = 8
learning_rate = 2e-5

# Set the model to training mode
model.train()

# Use AdamW optimizer for fine-tuning
optimizer = AdamW(model.parameters(), lr=learning_rate)

# Create a DataLoader for the training set
train_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# Training loop
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    running_loss = 0.0
    for batch in tqdm(train_loader):
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = batch['label']

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        logits = outputs.logits

        # Backward pass
        loss.backward()

        # Optimize
        optimizer.step()

        running_loss += loss.item()

    print(f"Loss after epoch {epoch + 1}: {running_loss / len(train_loader)}")


Epoch 1/3


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.80s/it]


Loss after epoch 1: 1.2560291290283203
Epoch 2/3


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.70s/it]


Loss after epoch 2: 1.058111548423767
Epoch 3/3


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.57s/it]

Loss after epoch 3: 1.001037359237671





In [8]:
# Evaluation after training
model.eval()  # Set model to evaluation mode
predictions, true_labels = [], []

with torch.no_grad():
    for batch in test_loader:
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = batch['label']
        
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        
        # Get the predicted class
        preds = torch.argmax(logits, dim=1)
        predictions.extend(preds.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

# Evaluate the model performance
accuracy = accuracy_score(true_labels, predictions)
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(true_labels, predictions))


Accuracy: 1.0000
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1
           2       1.00      1.00      1.00         1

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3

