In [2]:
%pip install transformers datasets torch accelerate


Defaulting to user installation because normal site-packages is not writeable
Collecting transformers
  Downloading transformers-4.57.3-py3-none-any.whl.metadata (43 kB)
Collecting datasets
  Downloading datasets-4.4.2-py3-none-any.whl.metadata (19 kB)
Collecting torch
  Downloading torch-2.9.1-cp313-cp313-win_amd64.whl.metadata (30 kB)
Collecting accelerate
  Downloading accelerate-1.12.0-py3-none-any.whl.metadata (19 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)
  Downloading huggingface_hub-0.36.0-py3-none-any.whl.metadata (14 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)
  Downloading tokenizers-0.22.2-cp39-abi3-win_amd64.whl.metadata (7.4 kB)
Collecting safetensors>=0.4.3 (from transformers)
  Downloading safetensors-0.7.0-cp38-abi3-win_amd64.whl.metadata (4.2 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.6.0-cp313-cp313-win_amd64.whl.metadata (13 kB)
Collecting multiprocess<0.70.19 (from datasets)
  Downloading multiprocess-0.70.1

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
spyder 6.0.7 requires ipython!=8.17.1,<9.0.0,>=8.13.0; python_version > "3.8", but you have ipython 9.4.0 which is incompatible.


In [4]:
%pip install torch nltk numpy



Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [5]:
##2. Import Libraries
import json
import random
import pickle
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import nltk
from nltk.stem import WordNetLemmatizer

nltk.download("punkt")
nltk.download("wordnet")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\maury\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\maury\AppData\Roaming\nltk_data...


True

In [7]:
##3. Load intents.json (FIXED CODE)
with open("dataset/intents.json", "r") as f:
    intents = json.load(f)


In [8]:
##4. Preprocessing
lemmatizer = WordNetLemmatizer()

words = []
classes = []
documents = []

for intent in intents["intents"]:
    for pattern in intent["patterns"]:
        word_list = nltk.word_tokenize(pattern)
        words.extend(word_list)
        documents.append((word_list, intent["tag"]))
        if intent["tag"] not in classes:
            classes.append(intent["tag"])

words = sorted(set([lemmatizer.lemmatize(w.lower()) for w in words if w.isalnum()]))
classes = sorted(set(classes))


In [9]:
##5. Training Data
training = []
output_empty = [0] * len(classes)

for doc in documents:
    bag = []
    word_patterns = [lemmatizer.lemmatize(w.lower()) for w in doc[0]]
    for w in words:
        bag.append(1 if w in word_patterns else 0)

    output = output_empty[:]
    output[classes.index(doc[1])] = 1
    training.append((bag, output))

random.shuffle(training)
training = np.array(training, dtype=object)

X = np.array(list(training[:, 0]))
y = np.array(list(training[:, 1]))


In [11]:
##6. Save Metadata (IMPORTANT)
import os

os.makedirs("saved_model", exist_ok=True)
print("saved_model folder ready ✅")

import pickle

pickle.dump(words, open("saved_model/words.pkl", "wb"))
pickle.dump(classes, open("saved_model/classes.pkl", "wb"))

print("Metadata saved successfully ✅")





saved_model folder ready ✅
Metadata saved successfully ✅


In [12]:
##7. PyTorch Model (OPTIMIZED)
class ChatBotModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        return self.fc2(x)


In [13]:
##8. Train Model
model = ChatBotModel(len(words), 128, len(classes))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(300):
    outputs = model(torch.FloatTensor(X))
    loss = criterion(outputs, torch.argmax(torch.FloatTensor(y), dim=1))

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 50 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")


Epoch 0, Loss: 4.3935
Epoch 50, Loss: 3.2516
Epoch 100, Loss: 1.2585
Epoch 150, Loss: 0.3539
Epoch 200, Loss: 0.1450
Epoch 250, Loss: 0.0814


In [14]:
##9. Save Model
torch.save(model.state_dict(), "saved_model/model.pth")
print("Model trained and saved ✅")


Model trained and saved ✅
