### This is a "self-learning" chatbot using nltk in python

In [0]:
pip install nltk



In [0]:
pip install newspaper3k



In [0]:
from newspaper import Article
import random
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
import numpy as np
import warnings

In [0]:
#Ignore any warning messages
warnings.filterwarnings('ignore')

In [0]:
#Download packages from nltk
nltk.download('punkt', quiet=True)
nltk.download('wordnet', quiet=True)

True

In [0]:
#Get the article URL
article = Article('https://www.mayoclinic.org/diseases-conditions/chronic-kidney-disease/symptoms-causes/syc-20354521')
article.download()
article.parse()
article.nlp()
corpus = article.text

print(corpus)

Overview

Chronic kidney disease, also called chronic kidney failure, describes the gradual loss of kidney function. Your kidneys filter wastes and excess fluids from your blood, which are then excreted in your urine. When chronic kidney disease reaches an advanced stage, dangerous levels of fluid, electrolytes and wastes can build up in your body.

In the early stages of chronic kidney disease, you may have few signs or symptoms. Chronic kidney disease may not become apparent until your kidney function is significantly impaired.

Treatment for chronic kidney disease focuses on slowing the progression of the kidney damage, usually by controlling the underlying cause. Chronic kidney disease can progress to end-stage kidney failure, which is fatal without artificial filtering (dialysis) or a kidney transplant.

Chronic kidney disease care at Mayo Clinic

How kidneys work

Symptoms

Signs and symptoms of chronic kidney disease develop over time if kidney damage progresses slowly. Signs an

In [0]:
#Tokenization
text = corpus
sent_tokens = nltk.sent_tokenize(text)

print(sent_tokens)

['Overview\n\nChronic kidney disease, also called chronic kidney failure, describes the gradual loss of kidney function.', 'Your kidneys filter wastes and excess fluids from your blood, which are then excreted in your urine.', 'When chronic kidney disease reaches an advanced stage, dangerous levels of fluid, electrolytes and wastes can build up in your body.', 'In the early stages of chronic kidney disease, you may have few signs or symptoms.', 'Chronic kidney disease may not become apparent until your kidney function is significantly impaired.', 'Treatment for chronic kidney disease focuses on slowing the progression of the kidney damage, usually by controlling the underlying cause.', 'Chronic kidney disease can progress to end-stage kidney failure, which is fatal without artificial filtering (dialysis) or a kidney transplant.', 'Chronic kidney disease care at Mayo Clinic\n\nHow kidneys work\n\nSymptoms\n\nSigns and symptoms of chronic kidney disease develop over time if kidney damage

In [0]:
#Create Dictionary (key:value) pair to remove punctuation
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)

#Print the punctuations
print(string.punctuation)

#print the dictionary
print(remove_punct_dict)

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
{33: None, 34: None, 35: None, 36: None, 37: None, 38: None, 39: None, 40: None, 41: None, 42: None, 43: None, 44: None, 45: None, 46: None, 47: None, 58: None, 59: None, 60: None, 61: None, 62: None, 63: None, 64: None, 91: None, 92: None, 93: None, 94: None, 95: None, 96: None, 123: None, 124: None, 125: None, 126: None}


In [0]:
#Create a function to return a list of lemmatized lower case words after removing punctuations
def LemNormalize(text):
  return nltk.word_tokenize(text.lower().translate(remove_punct_dict))

#Print the tokenization text
print(LemNormalize(text))

['overview', 'chronic', 'kidney', 'disease', 'also', 'called', 'chronic', 'kidney', 'failure', 'describes', 'the', 'gradual', 'loss', 'of', 'kidney', 'function', 'your', 'kidneys', 'filter', 'wastes', 'and', 'excess', 'fluids', 'from', 'your', 'blood', 'which', 'are', 'then', 'excreted', 'in', 'your', 'urine', 'when', 'chronic', 'kidney', 'disease', 'reaches', 'an', 'advanced', 'stage', 'dangerous', 'levels', 'of', 'fluid', 'electrolytes', 'and', 'wastes', 'can', 'build', 'up', 'in', 'your', 'body', 'in', 'the', 'early', 'stages', 'of', 'chronic', 'kidney', 'disease', 'you', 'may', 'have', 'few', 'signs', 'or', 'symptoms', 'chronic', 'kidney', 'disease', 'may', 'not', 'become', 'apparent', 'until', 'your', 'kidney', 'function', 'is', 'significantly', 'impaired', 'treatment', 'for', 'chronic', 'kidney', 'disease', 'focuses', 'on', 'slowing', 'the', 'progression', 'of', 'the', 'kidney', 'damage', 'usually', 'by', 'controlling', 'the', 'underlying', 'cause', 'chronic', 'kidney', 'disease'

In [0]:
#Keyword Matching

#Greeting Inputs
GREETING_INPUTS = ["hi", "hello", "hola", "greetings", "wassup", "hey", "hi there", "good morning", "good afternoon"]

#Greeting responses
GREETING_RESPONSES = ["howdy", "Hi", "Hey", "What's good", "Hello", "Hey there"]

#Function to return a random greeting response
def greeting(sentence):
  #if user input's a greeting, then return a randomly choosen greeting response
  for word in sentence.split():
    if word.lower() in GREETING_INPUTS:
      return random.choice(GREETING_RESPONSES)

In [0]:
#Generate the response
def response(user_response):

  #The user's query
  #user_response = 'what is chronic kidney disease'
  user_response = user_response.lower()
  #print(user_response)

  #Set the chatbot response to an empty string
  robo_response = ''

  #Append the users response to the sentence list
  sent_tokens.append(user_response)
  #print(sent_tokens)
  #Create a TfidfVectorizer object
  TfidfVec = TfidfVectorizer(tokenizer = LemNormalize, stop_words='english')

  #Convert the text to a matrix of Tf-Idf features
  tfidf = TfidfVec.fit_transform(sent_tokens)
  #print(tfidf)

  #Get the measure of similarity (similarity score)
  vals = cosine_similarity(tfidf[-1], tfidf)
  #print(vals)

  #Get the most similar text/sentence to the users response
  idx = vals.argsort()[0][-2] 

  #Reduce the dimensionality of vals
  flat = vals.flatten()

  #sort the list in ascending order
  flat.sort()

  #Get the most similar score to the users response
  score = flat[-2]
  #print(score)

  #If the variable 'score' is 0, then there is no text similar to users response
  if(score == 0):
    robo_response = robo_response + " I apologise, I don't understand."
  else:
    robo_response = robo_response + sent_tokens[idx]

  #Print the chatbot response
  #print(robo_response)

  #Remove the user response from sentence token list
  sent_tokens.remove(user_response)
   
  return robo_response

In [0]:
flag = True
print("DocBot: Hey, I am DocBot. I will answer your queries about Chronic Kidney Diseases. If you want to exit type Bye")
bye = ['bye', 'talk to you later', 'ttyl', 'it was nice talking to you', 'seeya soon', 'i dont need your help', 'enough for now']
bye_response = ['Talk to you later, bye!', 'It was nice talking to you.', 'Have a nice day :)', 'Anytime you need me, I am always here. Bye:)', 'Get well soon, bye.', 'Okay, bye!!']
thanks = ['thanks','thankx','thnkx','thank you', 'thenkx','it was helpful']
while(flag == True):
  print("You: ")
  user_response = input()
  user_response = user_response.lower()
  remove_punctuation_dict = dict((ord(punct), None) for punct in string.punctuation)
  user_response.translate(remove_punctuation_dict)
  if(user_response not in bye):
    if(user_response in thanks):
      flag = False
      print("DocBot: You are welcome! ")
    else:
      if(greeting(user_response) != None):
        print("DocBot: " + greeting(user_response))  
      else:
        print("DocBot: "+ response(user_response))    
  else:
    flag = False
    print("DocBot: " + random.choice(bye_response))