In [1]:
from newspaper import Article
import nltk
import string
import random
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
nltk.download("punkt",quiet=True)
nltk.download("wordnet", quiet=True)

True

In [3]:
#Get the text from the URL
article = Article("https://www.mayoclinic.org/diseases-conditions/chronic-kidney-disease/symptoms-causes/syc-20354521")
article.download()
article.parse()
article.nlp()
corpus = article.text

print(corpus)

Overview

Chronic kidney disease, also called chronic kidney failure, describes the gradual loss of kidney function. Your kidneys filter wastes and excess fluids from your blood, which are then excreted in your urine. When chronic kidney disease reaches an advanced stage, dangerous levels of fluid, electrolytes and wastes can build up in your body.

In the early stages of chronic kidney disease, you may have few signs or symptoms. Chronic kidney disease may not become apparent until your kidney function is significantly impaired.

Treatment for chronic kidney disease focuses on slowing the progression of the kidney damage, usually by controlling the underlying cause. Chronic kidney disease can progress to end-stage kidney failure, which is fatal without artificial filtering (dialysis) or a kidney transplant.

Chronic kidney disease care at Mayo Clinic

How kidneys work

Symptoms

Signs and symptoms of chronic kidney disease develop over time if kidney damage progresses slowly. Signs an

In [4]:
all_text = corpus
#convert text into a list of sentences
sentences = nltk.sent_tokenize(all_text)

print(sentences[:5])

['Overview\n\nChronic kidney disease, also called chronic kidney failure, describes the gradual loss of kidney function.', 'Your kidneys filter wastes and excess fluids from your blood, which are then excreted in your urine.', 'When chronic kidney disease reaches an advanced stage, dangerous levels of fluid, electrolytes and wastes can build up in your body.', 'In the early stages of chronic kidney disease, you may have few signs or symptoms.', 'Chronic kidney disease may not become apparent until your kidney function is significantly impaired.']


In [5]:
# punctuation list
rem_punc = dict((ord(p),None) for p in string.punctuation)

print(rem_punc)

{33: None, 34: None, 35: None, 36: None, 37: None, 38: None, 39: None, 40: None, 41: None, 42: None, 43: None, 44: None, 45: None, 46: None, 47: None, 58: None, 59: None, 60: None, 61: None, 62: None, 63: None, 64: None, 91: None, 92: None, 93: None, 94: None, 95: None, 96: None, 123: None, 124: None, 125: None, 126: None}


In [6]:
#function to remove punctuation and return lower cased text
def remove_punc (all_text):
    return nltk.word_tokenize(all_text.lower().translate(rem_punc))

In [7]:
print(remove_punc(all_text))

['overview', 'chronic', 'kidney', 'disease', 'also', 'called', 'chronic', 'kidney', 'failure', 'describes', 'the', 'gradual', 'loss', 'of', 'kidney', 'function', 'your', 'kidneys', 'filter', 'wastes', 'and', 'excess', 'fluids', 'from', 'your', 'blood', 'which', 'are', 'then', 'excreted', 'in', 'your', 'urine', 'when', 'chronic', 'kidney', 'disease', 'reaches', 'an', 'advanced', 'stage', 'dangerous', 'levels', 'of', 'fluid', 'electrolytes', 'and', 'wastes', 'can', 'build', 'up', 'in', 'your', 'body', 'in', 'the', 'early', 'stages', 'of', 'chronic', 'kidney', 'disease', 'you', 'may', 'have', 'few', 'signs', 'or', 'symptoms', 'chronic', 'kidney', 'disease', 'may', 'not', 'become', 'apparent', 'until', 'your', 'kidney', 'function', 'is', 'significantly', 'impaired', 'treatment', 'for', 'chronic', 'kidney', 'disease', 'focuses', 'on', 'slowing', 'the', 'progression', 'of', 'the', 'kidney', 'damage', 'usually', 'by', 'controlling', 'the', 'underlying', 'cause', 'chronic', 'kidney', 'disease'

In [8]:
#Greetingd by user
GREETING_INPUTS = ["hi","hello",'hola',"wassup","greetings","hey"]
#Response by bot
GREETING_RESPONSES = ["howdy","hello","hi","hey","hello there"]

In [9]:
#Return a random selected greeting 
def greeting(sent):
    for word in sent.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES)

In [10]:
#Create tfidf vectorizer object
tf_idf_vect = TfidfVectorizer(tokenizer = remove_punc, stop_words="english")

In [12]:
def response(query):
    #query = "What is Kidney Chronic Disease?"
    query = query.lower().translate(rem_punc)

    #print(query)
    
    #append user's query to the ned of the text
    sentences.append(query)
    #print(sentences)

    #convert the text to a list of tfidf vectors
    tf_idf = tf_idf_vect.fit_transform(sentences)
    
    robo_response = ""

    # get the measure of cosine similarity
    score = cosine_similarity(tf_idf[-1],tf_idf)

    #print the similarity scores
    #print(score)

    #get the highest score
    #Since it is list of lists, get [0]
    idx = score.argsort()[0][-2]

    #reduce the dimensionality
    flat = score.flatten()

    #sort the list
    flat.sort()

    #get the most similar score to the user's query
    s_score = flat[-2]

    print(" Similarity score : ", s_score)
    #if the score is zero, no response found

    if s_score == 0:
        robo_response = robo_response + "I apologize, I don't understand"
    else :
        robo_response= robo_response + sentences[idx]
    
    sentences.remove(query) 
    return robo_response



In [16]:
flag = True
print("DOCBot: I am Doctor bot or DOCBot for short. I will help you answer your query about CKD. Type 'Bye' to esit")

while(flag):
    user_response = input()
    user_response = user_response.lower()
    if(user_response != 'bye'):
        if user_response == 'thanks' or user_response == 'thank you':
            flag = False
            print("DOCBot:", "You are welcome")
        else:
            if(greeting(user_response) != None):
                print("DOCBot:", greeting(user_response))
            else:
                print("DOCBot:"+ response(user_response))
    else:
        flag = False
        print("DOCBot: Chat with you later")

DOCBot: I am Doctor bot or DOCBot for short. I will help you answer your query about CKD. Type 'Bye' to esit
hi
DOCBot: hello there
what is kidney chronic disease 
 Similarity score :  0.5068559627834549
DOCBot:Overview

Chronic kidney disease, also called chronic kidney failure, describes the gradual loss of kidney function.
thanks
DOCBot: You are welcome
