In [8]:
import nltk
from nltk.stem.porter import PorterStemmer
import numpy as np
nltk.download('punkt') # Pre trained tokenize

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\aseba\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [9]:
stemmer = PorterStemmer()

In [10]:
def tokenize(sentence):
    return nltk.word_tokenize(sentence)

def stem(word):
    return stemmer.stem(word.lower())

def bag_of_words(tokenized_sentence, all_words):
    tokenized_sentence = [stem(w) for w in tokenized_sentence]
    bag = np.zeros(len(all_words), dtype=np.float32)
    for idx, w in enumerate(all_words):
        if w in tokenized_sentence: # Formato One-Hot 
            bag[idx] = 1.0 # Asignamos 1 en la posición de cada palabra en all_words
    return bag

In [11]:
import json

In [12]:
with open('intents.json', 'r') as f:
    intents = json.load(f)
    
all_words = []
tags = []
xy = []
for intent in intents['intents']:
    tag = intent['tag']
    tags.append(tag)
    for pattern in intent['patterns']:
        w = tokenize(pattern)
        all_words.extend(w)
        xy.append((w, tag)) # Cada pattern tokenizado con su tag
    
ignore_words = ['?', '!', '.', ',']
all_words = [stem(w) for w in all_words if w not in ignore_words] # Stem solo a las palabras, no caracteres especiales
all_words = sorted(set(all_words)) # Removemos palabras repetidas
tags = sorted(set(tags))

In [13]:
X_train = []
y_train = []
for (pattern_sentence, tag) in xy:
    bag = bag_of_words(pattern_sentence, all_words)
    X_train.append(bag) # Un vector one-hot para cada pattern
    
    label = tags.index(tag)
    y_train.append(label) # Ocuparemos CrossEntropyLoss (Clasificación)
    
X_train = np.array(X_train)
y_train = np.array(y_train)

In [14]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [15]:
class ChatDataset(Dataset):
    def __init__(self, X_train, y_train):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train
        
    def __getitem__(self, idx):
        return self.x_data[idx], torch.tensor(self.y_data[idx], dtype=torch.long)
    
    def __len__(self):
        return self.n_samples

In [16]:
batch_size = 8

dataset = ChatDataset(X_train, y_train)
train_loader = DataLoader(dataset = dataset, batch_size = batch_size, shuffle = True)

In [17]:
next(iter(train_loader))

[tensor([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1.],
         [0.

In [18]:
class ChatModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super().__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.l2 = nn.Linear(hidden_size, hidden_size)
        self.l3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()
        
    def forward(self, x): # Grafo
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        out = self.relu(out)
        out = self.l3(out)
        return out

In [19]:
hidden_size = 8 # 8 neuronas
output_size = len(tags)
input_size = len(X_train[0])
learning_rate = 0.001
num_epochs = 2000

device = torch.device("cuda") if torch.cuda.is_available() else 'cpu' # Por si tienes cuda
model = ChatModel(input_size, hidden_size, output_size).to(device)
model

ChatModel(
  (l1): Linear(in_features=54, out_features=8, bias=True)
  (l2): Linear(in_features=8, out_features=8, bias=True)
  (l3): Linear(in_features=8, out_features=7, bias=True)
  (relu): ReLU()
)

In [20]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [21]:
for epoch in range(num_epochs):
    for (words, labels) in train_loader: # Para cada batch
        words = words.to(device)
        labels = labels.to(device)
        
        outputs = model(words) # forward
        loss = criterion(outputs, labels)
        
        # backward
        optimizer.zero_grad() # gradientes en 0 para cada batch
        loss.backward()
        optimizer.step() # Actualizamos parámetros
    
    if (epoch + 1) % 100 == 0:
        print(f'epoch {epoch + 1}/{num_epochs}, loss={loss.item():.4f}')
        
print(f'final loss, loss = {loss.item():.4f}')

data = {
    "model_state": model.state_dict(),
    "input_size": input_size,
    "output_size": output_size,
    "hidden_size": hidden_size,
    "all_words": all_words,
    "tags": tags
}

FILE = "data.pth"
torch.save(data, FILE)

print(f'training complete, file saved to {FILE}')

epoch 100/2000, loss=0.5699
epoch 200/2000, loss=0.1730
epoch 300/2000, loss=0.0332
epoch 400/2000, loss=0.0084
epoch 500/2000, loss=0.0022
epoch 600/2000, loss=0.0077
epoch 700/2000, loss=0.0009
epoch 800/2000, loss=0.0003
epoch 900/2000, loss=0.0002
epoch 1000/2000, loss=0.0004
epoch 1100/2000, loss=0.0004
epoch 1200/2000, loss=0.0002
epoch 1300/2000, loss=0.0002
epoch 1400/2000, loss=0.0004
epoch 1500/2000, loss=0.0001
epoch 1600/2000, loss=0.0000
epoch 1700/2000, loss=0.0001
epoch 1800/2000, loss=0.0000
epoch 1900/2000, loss=0.0000
epoch 2000/2000, loss=0.0000
final loss, loss = 0.0000
training complete, file saved to data.pth


In [22]:
import random

data = torch.load(FILE)

input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data["all_words"]
tags = data["tags"]
model_state = data["model_state"]

model.load_state_dict(model_state) # Cargamos parámetros
model.eval() # Modo evaluación

ChatModel(
  (l1): Linear(in_features=54, out_features=8, bias=True)
  (l2): Linear(in_features=8, out_features=8, bias=True)
  (l3): Linear(in_features=8, out_features=7, bias=True)
  (relu): ReLU()
)

In [25]:
bot_name = "Andrés"
print("Comencemos a hablar! Puedes escribir 'quit' para salir")
while True:
    sentence = input('Tu: ')
    if sentence == 'quit':
        break
    
    sentence = tokenize(sentence)
    X = bag_of_words(sentence, all_words)
    X = X.reshape(1, X.shape[0]) # 1 Fila, y columnas igual al número de palabras
    X = torch.from_numpy(X)
    
    output = model(X)
    _, predicted = torch.max(output, dim = 1)
    tag = tags[predicted.item()]
    
    probs = torch.softmax(output, dim = 1)
    prob = probs[0][predicted.item()]
    
    if prob.item() > 0.75:
        for intent in intents["intents"]:
            if tag == intent["tag"]:
                print(f"{bot_name}: {random.choice(intent['responses'])}")
    else:
        print(f"{bot_name}: No entiendo lo que me dices")

Comencemos a hablar! Puedes escribir 'quit' para salir
Tu: Hello
Andrés: Hi there, how can I help?
Tu: What products do you sell?
Andrés: We sell coffee and tea
Tu: Thanks!
Andrés: My pleasure
Tu: quit
