In [1]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/ASP Grp 3 Panda 冠军队 Team/ASP AIML Model Training

/content/drive/MyDrive/ASP Grp 3 Panda 冠军队 Team/ASP AIML Model Training


In [3]:
%pwd

'/content/drive/MyDrive/ASP Grp 3 Panda 冠军队 Team/ASP AIML Model Training'

In [4]:
import nltk
from nltk.stem.porter import PorterStemmer
import json
import numpy as np

import torch 
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import random

nltk.download("punkt")
stemmer = PorterStemmer()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [5]:
def tokenize(sentence):
  return nltk.word_tokenize(sentence)

In [6]:
def stem(word):
  return stemmer.stem(word.lower())

In [7]:
def bag_of_words(tokenized_sentence, all_words):
  """
  sentence = ["hello", "how", "are", "you"]
  words = ["hi", "hello", "I", "you", "bye", "thank", "cool"]
  bag = [0, 1, 0, 1, 0, 0, 0]
  """
  tokenized_sentence = [stem(w) for w in tokenized_sentence]

  bag = np.zeros(len(all_words), dtype = np.float32)
  for idx, w in enumerate(all_words):
    if w in tokenized_sentence:
      bag[idx] = 1.0
  
  return bag

In [8]:
sentence = ["hello", "how", "are", "you"]
words = ["hi", "hello", "I", "you", "bye", "thank", "cool"]
bag = bag_of_words(sentence, words)
print(bag)

[0. 1. 0. 1. 0. 0. 0.]


In [9]:
words = ["Organize", "organizes", "organizing"]
stemmed_words = [stem(x) for x in words]
print(stemmed_words)

['organ', 'organ', 'organ']


In [10]:
with open("intents.json", "r") as f:
  intents = json.load(f)

print(intents)

{'intents': [{'tag': 'greeting', 'patterns': ['Hi', 'Hey', 'How are you?', 'Hi Chatbot', 'Hello', 'Good morning', 'Good afternoon', 'Good evening', 'Howdy chatbot', 'Are you the Question and Answer chatbot I am looking for?'], 'responses': ['Hi there, is there any question you have that I can answer?', 'Hello, I am here to answer to answer any questions you possibly have. Do not hesistate to ask me any as it is my duty to help you/']}, {'tag': 'farewell', 'patterns': ['Bye', 'See you later', 'Goodbye', 'See you next time', 'I will be back later', 'Hope to see you again', 'Good night'], 'responses': ['Thank you. Do not hesistate to come back to ask me anymore questions', 'Thank you. Please ask me more questions later if you have more to ask.']}, {'tag': 'about', 'patterns': ['What is Chronic Kidney Disease?', 'Is Chronic Kidney Disease the same as CKD?', 'Can you explain to me what is CKD', 'Is CKD a sudden disease like heart attack?', 'How does CKD affect the kidneys?'], 'responses': [

In [11]:
all_words = []
tags = []
xy = []
for intent in intents["intents"]:
  tag = intent["tag"]
  tags.append(tag)
  for pattern in intent["patterns"]:
    w = tokenize(pattern)
    all_words.extend(w)
    xy.append((w, tag))

In [12]:
print(all_words)

['Hi', 'Hey', 'How', 'are', 'you', '?', 'Hi', 'Chatbot', 'Hello', 'Good', 'morning', 'Good', 'afternoon', 'Good', 'evening', 'Howdy', 'chatbot', 'Are', 'you', 'the', 'Question', 'and', 'Answer', 'chatbot', 'I', 'am', 'looking', 'for', '?', 'Bye', 'See', 'you', 'later', 'Goodbye', 'See', 'you', 'next', 'time', 'I', 'will', 'be', 'back', 'later', 'Hope', 'to', 'see', 'you', 'again', 'Good', 'night', 'What', 'is', 'Chronic', 'Kidney', 'Disease', '?', 'Is', 'Chronic', 'Kidney', 'Disease', 'the', 'same', 'as', 'CKD', '?', 'Can', 'you', 'explain', 'to', 'me', 'what', 'is', 'CKD', 'Is', 'CKD', 'a', 'sudden', 'disease', 'like', 'heart', 'attack', '?', 'How', 'does', 'CKD', 'affect', 'the', 'kidneys', '?', 'What', 'are', 'the', 'causes', 'of', 'CKD', '?', 'Is', 'diabetes', 'one', 'of', 'the', 'causes', 'of', 'CKD', '?', 'Is', 'high', 'blood', 'pressure', 'one', 'of', 'the', 'causes', 'of', 'CKD', '?', 'Are', 'there', 'any', 'symptops', 'of', 'CKD', '?', 'What', 'are', 'the', 'symptoms', 'of', '

In [13]:
ignore_words = ['?', '!', '.', ',']
all_words = [stem(w) for w in all_words if w not in ignore_words]
print(all_words)

['hi', 'hey', 'how', 'are', 'you', 'hi', 'chatbot', 'hello', 'good', 'morn', 'good', 'afternoon', 'good', 'even', 'howdi', 'chatbot', 'are', 'you', 'the', 'question', 'and', 'answer', 'chatbot', 'i', 'am', 'look', 'for', 'bye', 'see', 'you', 'later', 'goodby', 'see', 'you', 'next', 'time', 'i', 'will', 'be', 'back', 'later', 'hope', 'to', 'see', 'you', 'again', 'good', 'night', 'what', 'is', 'chronic', 'kidney', 'diseas', 'is', 'chronic', 'kidney', 'diseas', 'the', 'same', 'as', 'ckd', 'can', 'you', 'explain', 'to', 'me', 'what', 'is', 'ckd', 'is', 'ckd', 'a', 'sudden', 'diseas', 'like', 'heart', 'attack', 'how', 'doe', 'ckd', 'affect', 'the', 'kidney', 'what', 'are', 'the', 'caus', 'of', 'ckd', 'is', 'diabet', 'one', 'of', 'the', 'caus', 'of', 'ckd', 'is', 'high', 'blood', 'pressur', 'one', 'of', 'the', 'caus', 'of', 'ckd', 'are', 'there', 'ani', 'symptop', 'of', 'ckd', 'what', 'are', 'the', 'symptom', 'of', 'ckd', 'are', 'there', 'ani', 'complic', 'associ', 'with', 'ckd', 'what', 'is

In [14]:
all_words = sorted(set(all_words))
tags = sorted(set(tags))
print(tags)

['about', 'anemia_fatigue_treatment', 'antibotics_treatment', 'causes', 'cure', 'diabetes_CKD', 'diabetes_treatment', 'diagnosis', 'donation', 'experimental_treatments', 'farewell', 'gender_common', 'greeting', 'high_blood_pressure_treatment', 'high_calcium_bone_diease_treatment', 'high_cholesterol_level', 'inheritance', 'lifestyle', 'monitoring', 'nerve_damage_treament', 'outlook', 'poor_nutrition_treatment', 'possibly_unsafe_treatments', 'prevention', 'prevention_family_history', 'race_common', 'reverse', 'skin_problems_treatment', 'special_diet', 'swelling_urine_output_treatment', 'symptoms', 'treatment']


In [15]:
xy

[(['Hi'], 'greeting'),
 (['Hey'], 'greeting'),
 (['How', 'are', 'you', '?'], 'greeting'),
 (['Hi', 'Chatbot'], 'greeting'),
 (['Hello'], 'greeting'),
 (['Good', 'morning'], 'greeting'),
 (['Good', 'afternoon'], 'greeting'),
 (['Good', 'evening'], 'greeting'),
 (['Howdy', 'chatbot'], 'greeting'),
 (['Are',
   'you',
   'the',
   'Question',
   'and',
   'Answer',
   'chatbot',
   'I',
   'am',
   'looking',
   'for',
   '?'],
  'greeting'),
 (['Bye'], 'farewell'),
 (['See', 'you', 'later'], 'farewell'),
 (['Goodbye'], 'farewell'),
 (['See', 'you', 'next', 'time'], 'farewell'),
 (['I', 'will', 'be', 'back', 'later'], 'farewell'),
 (['Hope', 'to', 'see', 'you', 'again'], 'farewell'),
 (['Good', 'night'], 'farewell'),
 (['What', 'is', 'Chronic', 'Kidney', 'Disease', '?'], 'about'),
 (['Is', 'Chronic', 'Kidney', 'Disease', 'the', 'same', 'as', 'CKD', '?'],
  'about'),
 (['Can', 'you', 'explain', 'to', 'me', 'what', 'is', 'CKD'], 'about'),
 (['Is', 'CKD', 'a', 'sudden', 'disease', 'like', 'h

In [16]:
X_train = []
y_train = []
for (pattern_sentence, tag) in xy:
  bag = bag_of_words(pattern_sentence, all_words)
  X_train.append(bag)

  label = tags.index(tag)
  y_train.append(label) #used for CrossEntropyLoss later (not one-hot encoded)

In [17]:
X_train = np.array(X_train)
y_train = np.array(y_train)
X_train.shape, y_train.shape

((163, 199), (163,))

In [18]:
class ChatDataset(Dataset):
  def __init__(self):
    self.n_samples = len(X_train)
    self.x_data = X_train
    self.y_data = y_train
  
  def __getitem__(self, index):
    return self.x_data[index], self.y_data[index]
  
  def __len__(self):
    return self.n_samples

In [19]:
# Hyper-parameters 
num_epochs = 1000
batch_size = 8
learning_rate = 0.001
input_size = len(X_train[0])
hidden_size = 54
output_size = len(tags)
print(input_size, output_size)

199 32


In [20]:
dataset = ChatDataset()
train_loader = DataLoader(dataset = dataset, batch_size = batch_size, shuffle = True, num_workers = 2)

In [21]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size) 
        self.l2 = nn.Linear(hidden_size, hidden_size) 
        self.l3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        out = self.relu(out)
        out = self.l3(out)
        # no activation and no softmax at the end
        return out

In [22]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = NeuralNet(input_size, hidden_size, output_size).to(device)

In [23]:
# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [24]:
# Train the model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(dtype=torch.long).to(device)
        
        # Forward pass
        outputs = model(words)
        # if y would be one-hot, we must apply
        # labels = torch.max(labels, 1)[1]
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


print(f'final loss: {loss.item():.4f}')

Epoch [100/1000], Loss: 0.0036
Epoch [200/1000], Loss: 0.0018
Epoch [300/1000], Loss: 0.0000
Epoch [400/1000], Loss: 0.0001
Epoch [500/1000], Loss: 0.0003
Epoch [600/1000], Loss: 0.2353
Epoch [700/1000], Loss: 0.2420
Epoch [800/1000], Loss: 0.0000
Epoch [900/1000], Loss: 0.0000
Epoch [1000/1000], Loss: 0.0000
final loss: 0.0000


In [25]:
data = {
    "model_state": model.state_dict(),
    "input_size": input_size,
    "output_size": output_size,
    "hidden_size": hidden_size,
    "all_words": all_words,
    "tags": tags
}

In [26]:
FILE = "data.pth"
torch.save(data, FILE)
print(f"training completed. file saved to {FILE}")

training completed. file saved to data.pth


In [27]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

with open("intents.json", 'r') as f:
  intents = json.load(f)

FILE = "data.pth"
data = torch.load(FILE)

input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data["all_words"]
tags = data["tags"]
model_state = data["model_state"]

model = NeuralNet(input_size, hidden_size, output_size).to(device)
model.load_state_dict(model_state)
model.eval()

NeuralNet(
  (l1): Linear(in_features=199, out_features=54, bias=True)
  (l2): Linear(in_features=54, out_features=54, bias=True)
  (l3): Linear(in_features=54, out_features=32, bias=True)
  (relu): ReLU()
)

In [28]:
bot_name = "Sam"
print("Let's chat! Type 'q' to exit")
while True:
  sentence = input("You: ")
  if sentence == 'q':
    break
  
  sentence = tokenize(sentence)
  X = bag_of_words(sentence, all_words)
  X = X.reshape(1, X.shape[0])
  X = torch.from_numpy(X)

  output = model(X)
  _, predicted = torch.max(output, dim = 1)
  tag = tags[predicted.item()]

  probs = torch.softmax(output, dim = 1)
  prob = probs[0][predicted.item()]

  if prob.item() > 0.75:
    for intent in intents["intents"]:
      if tag == intent["tag"]:
        responses = intent["responses"]
        print(f"{bot_name}: {random.choice(responses)}")
  else:
    print(f"{bot_name}: I do not understand...")

Let's chat! Type 'q' to exit
You: Hello
Sam: Hello, I am here to answer to answer any questions you possibly have. Do not hesistate to ask me any as it is my duty to help you/
You: May I know what is CKD?
Sam: I do not understand...
You: What is CKD?
Sam: I do not understand...
You: CKD
Sam: I do not understand...
You: Howdy
Sam: Hi there, is there any question you have that I can answer?
You: What are the symptoms of CKD?
Sam: Symptoms of CKD may include swelling in the ankles and feet, fatigue, changes in urine output, anemia (low red blood cell count), bone disease, high blood pressure, infections, nerve damage and poor nutrition. Worst cases include kidney failure.
You: Does medication cure swelling?
Sam: Medications such as diuretics or ACE inhibitors may be used to treat swelling/urine output changes in people with CKD. However, these medications may not be suitable for everyone and may cause side effects.
You: What is Chronic Kidney Disease?
Sam: Chronic Kidney Disease also know

KeyboardInterrupt: ignored