In [1]:
!pip install transformers datasets scikit-learn



In [2]:
!pip install torch

Collecting torch
  Using cached torch-2.9.0-cp313-cp313-win_amd64.whl.metadata (30 kB)
Downloading torch-2.9.0-cp313-cp313-win_amd64.whl (109.3 MB)
   ---------------------------------------- 0.0/109.3 MB ? eta -:--:--
   ---------------------------------------- 0.0/109.3 MB ? eta -:--:--
   ---------------------------------------- 0.3/109.3 MB ? eta -:--:--
   ---------------------------------------- 0.3/109.3 MB ? eta -:--:--
   ---------------------------------------- 0.3/109.3 MB ? eta -:--:--
   ---------------------------------------- 0.5/109.3 MB 478.5 kB/s eta 0:03:48
   ---------------------------------------- 1.0/109.3 MB 920.4 kB/s eta 0:01:58
    --------------------------------------- 1.6/109.3 MB 1.2 MB/s eta 0:01:29
    --------------------------------------- 2.6/109.3 MB 1.8 MB/s eta 0:01:00
   - -------------------------------------- 3.4/109.3 MB 2.1 MB/s eta 0:00:50
   - -------------------------------------- 4.5/109.3 MB 2.4 MB/s eta 0:00:44
   - --------------------

In [17]:
!pip install accelerate>=0.26.0

In [6]:
import pandas as pd
from datasets import Dataset
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
import torch


In [7]:
df = pd.read_csv('Intent.csv')

print(df.head())
print(df.columns)

                                        Text             Intent
0             I want to apply for BTech 2025  admission_process
1  What is the eligibility for MTech course?        eligibility
2             How much are the fees for MBA?          fees_info
3    When will admission forms be available?    admission_dates
4                    My name is Rahul Sharma          user_info
Index(['Text', 'Intent'], dtype='object')


In [8]:
intent_labels = {label: i for i, label in enumerate(df['Intent'].unique())}

df['label'] = df['Intent'].map(intent_labels)

print(df.head())


                                        Text             Intent  label
0             I want to apply for BTech 2025  admission_process      0
1  What is the eligibility for MTech course?        eligibility      1
2             How much are the fees for MBA?          fees_info      2
3    When will admission forms be available?    admission_dates      3
4                    My name is Rahul Sharma          user_info      4


In [9]:
from sklearn.model_selection import train_test_split

train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['Text'].tolist(),
    df['label'].tolist(),
    test_size=0.2,
    random_state=42
)
train_dataset = Dataset.from_dict({"text": train_texts, "labels": train_labels})
test_dataset = Dataset.from_dict({"text": test_texts, "labels": test_labels})

print(train_dataset)


Dataset({
    features: ['text', 'labels'],
    num_rows: 124
})


In [10]:
model_name = "distilbert-base-uncased"
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True)

train_dataset = train_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])


Map:   0%|          | 0/124 [00:00<?, ? examples/s]

Map:   0%|          | 0/32 [00:00<?, ? examples/s]

In [11]:
num_labels = len(intent_labels)
model = DistilBertForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=15,
    per_device_train_batch_size=2,
    logging_dir='./logs',
    logging_steps=10,
    use_cpu=True
)


In [13]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset
)


In [14]:
trainer.train()


Step,Training Loss
10,2.4885
20,2.3368
30,2.2902
40,2.2279
50,2.2486
60,2.14
70,2.1635
80,1.8307
90,1.6787
100,2.1513


TrainOutput(global_step=930, training_loss=0.45825855641034985, metrics={'train_runtime': 599.6823, 'train_samples_per_second': 3.102, 'train_steps_per_second': 1.551, 'total_flos': 29360217566880.0, 'train_loss': 0.45825855641034985, 'epoch': 15.0})

In [15]:
model.save_pretrained("./intent_model")
tokenizer.save_pretrained("./intent_model")


('./intent_model\\tokenizer_config.json',
 './intent_model\\special_tokens_map.json',
 './intent_model\\vocab.txt',
 './intent_model\\added_tokens.json',
 './intent_model\\tokenizer.json')

In [16]:
intent_labels = {label: i for i, label in enumerate(df['Intent'].unique())}

id2label = {v: k for k, v in intent_labels.items()}

print(id2label)

{0: 'admission_process', 1: 'eligibility', 2: 'fees_info', 3: 'admission_dates', 4: 'user_info', 5: 'document_requirements', 6: 'evaluation_process', 7: 'contact_info', 8: 'scholarship_info', 9: 'technical_support', 10: 'academic_details', 11: 'campus_life'}


In [26]:
from transformers import pipeline

intent_classifier = pipeline("text-classification", model="./intent_model", tokenizer="./intent_model")

examples = [
    "I want to know about admission process",
    "What are the fees for MBA?",
    "Eligibility criteria for BTech?",
    ""
]

for text in examples:
    result = intent_classifier(text)
    label_idx = int(result[0]['label'].split("_")[1])
    intent_name = id2label[label_idx]
    print(f"Query: {text}")
    print(f"Predicted Intent: {intent_name}\n")


Device set to use cpu


Query: I want to know about admission process
Predicted Intent: evaluation_process

Query: What are the fees for MBA?
Predicted Intent: fees_info

Query: Eligibility criteria for BTech?
Predicted Intent: admission_process

Query: how can i contact perticulaar college
Predicted Intent: evaluation_process

