In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from torch.utils.data import DataLoader, Dataset
import torch
from tqdm import tqdm

# Set up device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load dataset
file_path = 'train.csv'
data = pd.read_csv(file_path)

# Data Preprocessing
X = data['sentence'].tolist()
y = pd.factorize(data['label'])[0]  # Convert labels to numerical format
label_to_index = {label: i for i, label in enumerate(data['label'].unique())}
index_to_label = {i: label for label, i in label_to_index.items()}

# Split data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Dataset class
class IntentDataset(Dataset):
    def __init__(self, sentences, labels, tokenizer, max_len=128):
        self.sentences = sentences
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sentence = self.sentences[idx]
        label = self.labels[idx]
        encoding = self.tokenizer(
            sentence,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_tensors="pt"
        )
        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'label': torch.tensor(label, dtype=torch.long)
        }

# Prepare datasets and loaders
train_dataset = IntentDataset(X_train, y_train, tokenizer)
val_dataset = IntentDataset(X_val, y_val, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)

# Load pre-trained BERT model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(label_to_index))
model.to(device)

# Optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)

# Training and Evaluation Loop
def train_epoch(model, data_loader, optimizer, device):
    model.train()
    total_loss = 0
    correct_predictions = 0

    for batch in tqdm(data_loader, desc="Training"):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        logits = outputs.logits
        total_loss += loss.item()

        _, preds = torch.max(logits, dim=1)
        correct_predictions += torch.sum(preds == labels)

        loss.backward()
        optimizer.step()

    return total_loss / len(data_loader), correct_predictions.double() / len(data_loader.dataset)

def evaluate_epoch(model, data_loader, device):
    model.eval()
    total_loss = 0
    correct_predictions = 0
    predictions, true_labels = [], []

    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Evaluation"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            logits = outputs.logits
            total_loss += loss.item()

            _, preds = torch.max(logits, dim=1)
            correct_predictions += torch.sum(preds == labels)

            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    return total_loss / len(data_loader), correct_predictions.double() / len(data_loader.dataset), predictions, true_labels

# Train the model
EPOCHS = 10
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch + 1}/{EPOCHS}")
    train_loss, train_acc = train_epoch(model, train_loader, optimizer, device)
    val_loss, val_acc, val_preds, val_labels = evaluate_epoch(model, val_loader, device)

    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}")
    print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}")

# Classification Report
print("\nClassification Report:")
print(classification_report(val_labels, val_preds, target_names=list(label_to_index.keys())))

# Save Model
model.save_pretrained('./bert_intent_model')
tokenizer.save_pretrained('./bert_intent_model')


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Epoch 1/10


Training: 100%|██████████| 17/17 [00:03<00:00,  5.38it/s]
Evaluation: 100%|██████████| 5/5 [00:00<00:00, 19.99it/s]


Train Loss: 3.0317, Train Accuracy: 0.0916
Validation Loss: 2.8661, Validation Accuracy: 0.2121

Epoch 2/10


Training: 100%|██████████| 17/17 [00:03<00:00,  5.49it/s]
Evaluation: 100%|██████████| 5/5 [00:00<00:00, 20.12it/s]


Train Loss: 2.6406, Train Accuracy: 0.2863
Validation Loss: 2.5226, Validation Accuracy: 0.3788

Epoch 3/10


Training: 100%|██████████| 17/17 [00:03<00:00,  5.48it/s]
Evaluation: 100%|██████████| 5/5 [00:00<00:00, 19.20it/s]


Train Loss: 2.0390, Train Accuracy: 0.5954
Validation Loss: 1.8787, Validation Accuracy: 0.5303

Epoch 4/10


Training: 100%|██████████| 17/17 [00:03<00:00,  5.41it/s]
Evaluation: 100%|██████████| 5/5 [00:00<00:00, 20.01it/s]


Train Loss: 1.4465, Train Accuracy: 0.8053
Validation Loss: 1.4197, Validation Accuracy: 0.6364

Epoch 5/10


Training: 100%|██████████| 17/17 [00:03<00:00,  5.46it/s]
Evaluation: 100%|██████████| 5/5 [00:00<00:00, 20.31it/s]


Train Loss: 0.9917, Train Accuracy: 0.8969
Validation Loss: 1.1520, Validation Accuracy: 0.6515

Epoch 6/10


Training: 100%|██████████| 17/17 [00:03<00:00,  4.98it/s]
Evaluation: 100%|██████████| 5/5 [00:00<00:00, 18.57it/s]


Train Loss: 0.6582, Train Accuracy: 0.9618
Validation Loss: 0.8742, Validation Accuracy: 0.8485

Epoch 7/10


Training: 100%|██████████| 17/17 [00:03<00:00,  5.31it/s]
Evaluation: 100%|██████████| 5/5 [00:00<00:00, 19.53it/s]


Train Loss: 0.4408, Train Accuracy: 0.9809
Validation Loss: 0.7213, Validation Accuracy: 0.8636

Epoch 8/10


Training: 100%|██████████| 17/17 [00:03<00:00,  5.47it/s]
Evaluation: 100%|██████████| 5/5 [00:00<00:00, 20.32it/s]


Train Loss: 0.2936, Train Accuracy: 0.9924
Validation Loss: 0.5944, Validation Accuracy: 0.8939

Epoch 9/10


Training: 100%|██████████| 17/17 [00:03<00:00,  5.35it/s]
Evaluation: 100%|██████████| 5/5 [00:00<00:00, 19.25it/s]


Train Loss: 0.2079, Train Accuracy: 1.0000
Validation Loss: 0.5262, Validation Accuracy: 0.8939

Epoch 10/10


Training: 100%|██████████| 17/17 [00:03<00:00,  5.13it/s]
Evaluation: 100%|██████████| 5/5 [00:00<00:00, 19.50it/s]


Train Loss: 0.1532, Train Accuracy: 1.0000
Validation Loss: 0.4947, Validation Accuracy: 0.8939

Classification Report:
                       precision    recall  f1-score   support

                  EMI       1.00      1.00      1.00         5
                  COD       1.00      1.00      1.00         2
       ORTHO_FEATURES       1.00      1.00      1.00         3
        ERGO_FEATURES       0.80      1.00      0.89         4
           COMPARISON       1.00      1.00      1.00         1
             WARRANTY       1.00      1.00      1.00         5
100_NIGHT_TRIAL_OFFER       1.00      0.50      0.67         4
   SIZE_CUSTOMIZATION       0.50      1.00      0.67         1
   WHAT_SIZE_TO_ORDER       1.00      0.75      0.86         4
             LEAD_GEN       1.00      0.75      0.86         4
        CHECK_PINCODE       1.00      1.00      1.00         1
         DISTRIBUTORS       0.88      0.88      0.88         8
        MATTRESS_COST       1.00      1.00      1.00        

('./bert_intent_model\\tokenizer_config.json',
 './bert_intent_model\\special_tokens_map.json',
 './bert_intent_model\\vocab.txt',
 './bert_intent_model\\added_tokens.json')

In [5]:
# Load dataset
file_path = 'train.csv'
data = pd.read_csv(file_path)

# Generate label mappings
label_to_index = {label: i for i, label in enumerate(data['label'].unique())}
index_to_label = {i: label for label, i in label_to_index.items()}

print("Label to Index Mapping:", label_to_index)
print("Index to Label Mapping:", index_to_label)


Label to Index Mapping: {'EMI': 0, 'COD': 1, 'ORTHO_FEATURES': 2, 'ERGO_FEATURES': 3, 'COMPARISON': 4, 'WARRANTY': 5, '100_NIGHT_TRIAL_OFFER': 6, 'SIZE_CUSTOMIZATION': 7, 'WHAT_SIZE_TO_ORDER': 8, 'LEAD_GEN': 9, 'CHECK_PINCODE': 10, 'DISTRIBUTORS': 11, 'MATTRESS_COST': 12, 'PRODUCT_VARIANTS': 13, 'ABOUT_SOF_MATTRESS': 14, 'DELAY_IN_DELIVERY': 15, 'ORDER_STATUS': 16, 'RETURN_EXCHANGE': 17, 'CANCEL_ORDER': 18, 'PILLOWS': 19, 'OFFERS': 20}
Index to Label Mapping: {0: 'EMI', 1: 'COD', 2: 'ORTHO_FEATURES', 3: 'ERGO_FEATURES', 4: 'COMPARISON', 5: 'WARRANTY', 6: '100_NIGHT_TRIAL_OFFER', 7: 'SIZE_CUSTOMIZATION', 8: 'WHAT_SIZE_TO_ORDER', 9: 'LEAD_GEN', 10: 'CHECK_PINCODE', 11: 'DISTRIBUTORS', 12: 'MATTRESS_COST', 13: 'PRODUCT_VARIANTS', 14: 'ABOUT_SOF_MATTRESS', 15: 'DELAY_IN_DELIVERY', 16: 'ORDER_STATUS', 17: 'RETURN_EXCHANGE', 18: 'CANCEL_ORDER', 19: 'PILLOWS', 20: 'OFFERS'}


In [9]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the saved model and tokenizer
model_path = './bert_intent_model'
tokenizer = BertTokenizer.from_pretrained(model_path)
model = BertForSequenceClassification.from_pretrained(model_path)
model.to(device)
model.eval()

# Function to predict intent
def predict_intent(sentence):
    # Tokenize input sentence
    encoding = tokenizer(
        sentence,
        max_length=128,
        padding='max_length',
        truncation=True,
        return_tensors="pt"
    )
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)

    # Make prediction
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        predicted_class = torch.argmax(logits, dim=1).item()

    # Map index to label
    return index_to_label[predicted_class]

# Test the model with a random sentence
test_sentence = "I want to buy pillows"
predicted_intent = predict_intent(test_sentence)

print(f"Input Sentence: {test_sentence}")
print(f"Predicted Intent: {predicted_intent}")


Input Sentence: I want to buy pillows
Predicted Intent: PILLOWS
