In [None]:
import numpy as np
import nltk
import random
import string
import sklearn
from sklearn.feature_extraction import text
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
import pandas
faq = pandas.read_csv("diabetes_questions_answers.csv")
faq.head()

Unnamed: 0,Question,Answer
0,How does diabetes affect the immune system?,"Yes, children can get diabetes, with Type 1 di..."
1,What is Type 2 diabetes?,"Genetics play a role in diabetes, particularly..."
2,How does smoking affect diabetes?,Diabetes increases the risk of cardiovascular ...
3,Can diabetes cause eye problems?,"Diabetic foot problems include infections, ulc..."
4,Can diabetes affect pregnancy?,"There is a genetic component to diabetes, but ..."


In [None]:
qns = faq["Question"]
answers = faq["Answer"]

In [None]:
qns

Unnamed: 0,Question
0,How does diabetes affect the immune system?
1,What is Type 2 diabetes?
2,How does smoking affect diabetes?
3,Can diabetes cause eye problems?
4,Can diabetes affect pregnancy?
...,...
995,What are the early signs of diabetes?
996,How important is regular medical check-up for ...
997,What is hyperglycemia?
998,What is continuous glucose monitoring?


In [None]:
answers

Unnamed: 0,Answer
0,"Yes, children can get diabetes, with Type 1 di..."
1,"Genetics play a role in diabetes, particularly..."
2,Diabetes increases the risk of cardiovascular ...
3,"Diabetic foot problems include infections, ulc..."
4,"There is a genetic component to diabetes, but ..."
...,...
995,Alcohol consumption can affect blood sugar lev...
996,"Diabetes can affect pregnancy, increasing the ..."
997,A diabetic diet focuses on eating nutrient-den...
998,"Currently, there is no cure for diabetes, but ..."


In [None]:
TfidfVec = TfidfVectorizer()

tfidf = TfidfVec.fit_transform(qns)
tfidf.shape

(1000, 110)

In [None]:
#the basic chatbot
#prepare some greeting words
GREETING_INPUTS = ("hello", "hi", "greetings", "sup", "what's up","hey",)
GREETING_RESPONSES = ["hi", "hey", "*nods*", "hi there", "hello", "I am glad! You are talking to me"]
def greeting(sentence):
    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES)


# function to match input to the preprocessed sentences
def response(user_response):
    robo_response = ''
    Q = ''
    A = ''
    new = TfidfVec.transform([user_response]) #vectorize the input to the same dimension space
    vals = cosine_similarity(new[0], tfidf)
    flat = vals.flatten()
    idx = flat.argsort()[-1]
    sim_max = flat[idx]
    if(sim_max<=0.2):
        robo_response = "I am sorry! I don't have answer for that."
        return robo_response, Q, sim_max, A
    else:
        robo_response = "Similar question found!"
        Q = qns[idx]
        A = "Ans: "+answers[idx]
        return robo_response, Q, sim_max, A

def say(robo_response, Q, score, A):
  print(robo_response)
  print(Q + ' (' + str(score) + ")")
  print(A)


In [None]:
say(*response("What is diabetes?"))

Similar question found!
What is diabetes? (1.0000000000000002)
Ans: Hypoglycemia, or low blood sugar, can cause symptoms like shakiness, confusion, and fainting.


In [None]:
say(*response("Do i have diabetes?"))

Similar question found!
How do blood sugar levels affect health? (0.44421850541273594)
Ans: Yes, children can get diabetes, with Type 1 diabetes being more common in children.


# Chatbot with Embedding

In [None]:
!pip install transformers



In [None]:
import numpy as np
from transformers import AutoTokenizer, AutoModel, pipeline

model = AutoModel.from_pretrained('distilbert-base-uncased')
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')
fe = pipeline('feature-extraction', model=model, tokenizer=tokenizer)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Device set to use cpu


In [None]:
def vec_trf(sent):
  vec = np.mean(np.squeeze(fe(sent)), axis=0)
  return vec

In [None]:
#Creating vectors for all questions
qns_trf = [ vec_trf(s) for s in qns ]

In [None]:
from scipy.spatial.distance import cosine

def response_trf(user_response):
    robo_response = ''
    Q = ''
    A = ''
    new = vec_trf(user_response) #creating vector for input
    vals = [ (1 - cosine(new, s)) for s in qns_trf ]
    idx = vals.index(max(vals))
    sim_max = max(vals)
    if(sim_max<=0.7):
        robo_response = "I am sorry! I don't have answer for that."
        return robo_response, Q, sim_max, A
    else:
        robo_response = "Similar question found!"
        Q = qns[idx]
        A = "Ans: "+answers[idx]
        return robo_response, Q, sim_max, A

In [None]:
say(*response_trf("What is diabetes"))

Similar question found!
What is diabetes? (0.9366883956869142)
Ans: Type 1 diabetes is an autoimmune condition, while Type 2 diabetes is often related to lifestyle factors such as obesity and inactivity.


In [None]:
say(*response_trf("What are the symptoms of diabetes?"))

Similar question found!
What are the symptoms of diabetes? (1.0)
Ans: Diabetes is treated with a combination of lifestyle changes, medications, and sometimes insulin therapy.


In [None]:
say(*response_trf("Do i have diabetes?"))

Similar question found!
Is diabetes hereditary? (0.8806823534720662)
Ans: Risk factors for Type 2 diabetes include obesity, physical inactivity, family history, and age.


In [None]:
# Clinical Bert Model

In [None]:
model = AutoModel.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
fe = pipeline('feature-extraction', model=model, tokenizer=tokenizer)

config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Device set to use cpu


In [None]:
def vec_trf(sent):
  vec = np.mean(np.squeeze(fe(sent)), axis=0)
  return vec

In [None]:
#Creating vectors for all questions
qns_trf = [ vec_trf(s) for s in qns ]

In [None]:
say(*response_trf("What is diabetes"))

Similar question found!
What is diabetes? (0.9470384352154912)
Ans: Type 1 diabetes is an autoimmune condition, while Type 2 diabetes is often related to lifestyle factors such as obesity and inactivity.


In [None]:
say(*response_trf("What are the symptoms of diabetes?"))

Similar question found!
What are the symptoms of diabetes? (1.0)
Ans: Diabetes is treated with a combination of lifestyle changes, medications, and sometimes insulin therapy.


In [None]:
say(*response_trf("Do i have diabetes?"))

Similar question found!
Can pets get diabetes? (0.9437811176875448)
Ans: Diabetes increases the risk of cardiovascular diseases such as heart attack and stroke.


In [None]:
say(*response_trf("My blood glucose level is 50mg/L. Do i have diabetes? "))

Similar question found!
What are diabetic foot problems? (0.9090435200838004)
Ans: Diabetic foot problems include infections, ulcers, and in severe cases, amputation due to poor circulation and nerve damage.


In [None]:
say(*response_trf("What can be done if i have diabetes?"))

Similar question found!
Can pets get diabetes? (0.92611181369209)
Ans: Diabetes increases the risk of cardiovascular diseases such as heart attack and stroke.
