#### Libraries importeren en random seed

In [104]:
import json
import pandas as pd
import numpy as np
import random
import nltk
from nltk.stem.porter import PorterStemmer
import torch
import torch.nn as nn 
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

In [93]:
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x143f0984810>

#### Data inladen en naar dataframe omzetten

In [94]:
data = json.load(open('Data/Intent.json'))
df = pd.DataFrame(data['intents'])
df = df[['intent', 'text', 'responses']]
df.head()

Unnamed: 0,intent,text,responses
0,Greeting,"[Hi, Hi there, Hola, Hello, Hello there, Hya, ...","[Hi human, please tell me your GeniSys user, H..."
1,GreetingResponse,"[My user is Adam, This is Adam, I am Adam, It ...","[Great! Hi <HUMAN>! How can I help?, Good! Hi ..."
2,CourtesyGreeting,"[How are you?, Hi how are you?, Hello how are ...","[Hello, I am great, how are you? Please tell m..."
3,CourtesyGreetingResponse,"[Good thanks! My user is Adam, Good thanks! Th...","[Great! Hi <HUMAN>! How can I help?, Good! Hi ..."
4,CurrentHumanQuery,"[What is my name?, What do you call me?, Who d...","[You are <HUMAN>! How can I help?, Your name i..."


#### Functies maken voor tokenizen

In [95]:
nltk.download('punkt_tab')
stemmer = PorterStemmer()

def tokenize(sentence):
    return nltk.word_tokenize(sentence.lower())

def stem (word):
    return stemmer.stem(word.lower())

def bag_of_words(tokenized_sentence, words):
    sentence_words = [stem(w) for w in tokenized_sentence]
    bag = np.zeros(len(words), dtype=np.float32)
    for idx, w in enumerate(words):
        if w in sentence_words:
            bag[idx] = 1.0
    return bag

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\boris\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


#### Woordenlijst maken

In [117]:
all_words = []
tags = []
xy = []

for intent in data["intents"]:
    tag = intent["intent"]
    tags.append(tag)
    
    for pattern in intent["text"]:
        w = tokenize(pattern)
        all_words.extend(w)
        xy.append((w, tag))
        
all_words = sorted(set(stem(w) for w in all_words if w not in ["?", ".", "!"]))
tags = sorted(set(tags))

#### Traindata samenstellen

In [118]:
X_train = []
Y_train = []

for pattern_sentence, tag in xy:
    bag = bag_of_words(pattern_sentence, all_words)
    X_train.append(bag)
    Y_train.append(tags.index(tag))
    
X_train = np.array(X_train)
Y_train = np.array(Y_train)
len(all_words)

114

In [119]:
class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = Y_train
    
    def __getitem__(self, index):
        return torch.tensor(self.x_data[index], dtype=torch.float32), torch.tensor(self.y_data[index], dtype=torch.long)
    
    def __len__(self):
        return self.n_samples
    
dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset, batch_size=8, shuffle=True, num_workers=0)

#### Intent Classifier netwerk bouwen

In [120]:
model = nn.Sequential(
    nn.Linear(len(X_train[0]), 50),
    nn.ReLU(),
    nn.Linear(50, len(tags))
)
print(len(X_train[0]))
print(len(tags))

114
22


#### Loss & optimalisator declareren en check voor cuda

In [121]:
criterium = nn.CrossEntropyLoss()
optimalisator = optim.Adam(model.parameters(), lr=0.001)

component = torch.device("cuda" if torch.cuda.is_available()
                         else "cpu")
model = model.to(component)
print(component)

cpu


In [122]:
num_epochs = 500
for epoch in range(num_epochs):
    model.train()
    for words, labels in train_loader:
                
        words = words.to(component)
        labels = labels.to(component)
        
        outputs = model(words)
        loss = criterium(outputs, labels)
        
        optimalisator.zero_grad()
        loss.backward()
        optimalisator.step()
    if (epoch+1) % 100 == 0:
        print(f"Epoch [{epoch + 1} / {num_epochs}], Loss: {loss.item():.4f}")
        
print("Training voltooid! Gewichten opslaan...")
torch.save(model.state_dict(), "Getrainde_modellen/NLP_Model_gewichten.pth")

Epoch [100 / 500], Loss: 0.0090
Epoch [200 / 500], Loss: 0.0017
Epoch [300 / 500], Loss: 0.0006
Epoch [400 / 500], Loss: 0.0001
Epoch [500 / 500], Loss: 0.0001
Training voltooid! Gewichten opslaan...


#### Evaluatie

In [123]:
model.eval()
with torch.no_grad():
    correct_voorspeld = 0
    totaal_voorspeld = 0
    
    for words, labels in train_loader:
        words, labels = words.to(component), labels.to(component)
        test_output = model(words)
        _, predicted = torch.max(test_output, 1)
        correct_voorspeld += (predicted == labels).sum().item()
        totaal_voorspeld += labels.size(0)

accuratie = correct_voorspeld / totaal_voorspeld * 100

print(f"Accuratie na training: {accuratie:.2f}%")

Accuratie na training: 100.00%


#### Chat implementatie

In [127]:
def chat():
    print("NLP model chat functie. Type 'stop' om chat te beëindigen.")
    
    while True:
        sentence = input("Jouw zin: ")
        
        if sentence.lower() == "stop":
            print("Tot ziens!")
            break
        
        tokenized_sentence = tokenize(sentence)
        bag = bag_of_words(tokenized_sentence, all_words)
        
        input_data = torch.tensor(bag, dtype=torch.float32)
        output = model(input_data)
        
        _, predicted = torch.max(output, dim=0)
        predicted_tag = tags[predicted.item()]
        
        for intent in data["intents"]:
            if intent["intent"] == predicted_tag:
                response = random.choice(intent["responses"])
                print(f"Gebruiker: {sentence}")
                print(f"Bot antwoord: {response}")

chat() #Begint de chat

NLP model chat functie. Type 'stop' om chat te beëindigen.
Gebruiker: Hello
Bot antwoord: Hi human, please tell me your GeniSys user
Gebruiker: This user is Boris
Bot antwoord: Great! Hi <HUMAN>! How can I help?
Gebruiker: How are you?
Bot antwoord: Hello, how are you? I am great thanks! Please tell me your GeniSys user
Gebruiker: Its Boris
Bot antwoord: Cool! Hello <HUMAN>, what can I do for you?
Gebruiker: Bye
Bot antwoord: See you later
Tot ziens!
