In [74]:
import pandas as pd

SEED = 0
pd.set_option('display.max_colwidth', None)

In [75]:
# Load Data

In [76]:
path = 'data/preprocessed-ai-medical-chatbot.csv'
df = pd.read_csv(path)
df = df.dropna()

In [77]:
df.sample(5, random_state=SEED)

Unnamed: 0,description,question,answer
152573,causes red spots around eyes child,hi 18month old suddenli becom faddi eat ok diet eat shreddi pot yogurt odd bit toast poorli around 15th april sick diarea week diet chang becam better develop littl red spot chin spread around face one disappear anoth appear somewher els prescrib caneston cream didnt realli anyth seem clear eat yogurt anyth moment manag get eat spot appear around eye could eat someth els,i donot think so its related to yogurt she is fond of eating as it does not spread and non itchy do you have mosquito at home apply some soothing cream moisturizer and that s all needed to your baby now
247419,skin colored itchy bumps elbow lower back temporary relief hydro cortisone cream allergy pills history seasonal allergies done,skin color itchi bump elbow lower back itch like crazi notic last spring took long time heal month got spring get season allergi start take allergi pill itch went away rash clear longer season allergi need take alleri pill loratadin 10 mg rash back start take allergi pill help littl itch still bump still itch time bump red scratch like crazi tri hydrocortison cream well help also tri skinceut hydra balm well help littl itch live new mexico dri use oil lotion daili skin dri bump smooth touch blister,hi dear user thanks for choosing hcm u have seasonal allergy called allergic dermatitis so evaluate it consult good dermatologist thanq
71438,vomiting one day pain appendix bladder,vomit sinc yesterday eat lunch mouth esophagu hurt stomach feel full addit sinc morn area appendix bladder hurt could strain muscl due vomit appendix,constipation when was your last bowel movement if you are clogged up it will have no where else to go but up chances are the pain in your belly by your bladder is a strained muscle so is your question about the strained muscle or the vomiting chances are you have a 24 hour flu or you ate something bad for breakfast
201206,liver cirrhosis enlarged spleen veins treated,hi name tammi multipl gallston hep c chorrosi liver enlarg spleen enlarg vein one live want touch want send bigger town better dr also want liver doner list hare get,hi and welcome to healthcaremagic thank you for your query i am dr rommstein i understand your concerns and i will try to help you as much as i can this is serious disease and quitting alcohol is the most important thing that should be done also medications which are hematotoxic should be avoided this is necessary to prevent disease progression which is lethal in most cases at early stages it can be treated with these measures but but in case of progression only liver transplantation may help diet should be balanced and healthy and get regular exercise limit high carb foods such as bread grits rice potatoes and corn and cut down on drinks with lots of sugar like sports drinks and juice if there is viral hepatitis as udnelying cause then antiviral medications are required i hope i have answered you query if you have any further questions you can contact us in every time kindly regards wish you a good health
40162,unable control agitation hallucinations restlessness remedy,51 yr old femal w glioblastoma end stage morphine30mg sq q 4hr atc w btd 15 mg q 1hr prn decadron 4mg sq bid haldol 5mg sq q4hr atc ativan 1 mg q 4 hr atc still unabl control agit hallucin restless suggest,hithanks for using healthcare magici think she has hallucination due to metastasis in that case just carry on with haloperidol or you can try quetiapine or olanzapine organic psychosis is very difficult to treat and some time does not get improved with medication better to try any of these sedative antipsychotic at least with it she would remain calm thanks


# Building and training model

#### Split the data into train, test, and validation

In [78]:
from sklearn.model_selection import train_test_split

X = df[['description', 'question']]
y = df['answer']

X_train_and_val, X_test, y_train_and_val, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

X_train, X_val, y_train, y_val = train_test_split(X_train_and_val, y_train_and_val, test_size=0.1, random_state=42)


In [79]:
print(f"Number of samples in train set: {X_train.shape[0]}")
print(f"Number of samples in validation set: {X_val.shape[0]}")
print(f"Number of samples in test set: {X_test.shape[0]}")

Number of samples in train set: 208097
Number of samples in validation set: 23122
Number of samples in test set: 25691


#### Training Model

In [87]:
import math

def dot_prod(vec1, vec2):
    dot_product = sum(a * b for a, b in zip(vec1, vec2))
    return dot_product

class SimilarityBasedAgent:
    
	def __init__(self, questions, responses):
		self.questions = questions
		self.responses = responses
		self.vocab = None
		self.questions_vectors = None

	def get_vocab(self):
		vocab = set()
		for word in self.questions:
			vocab.add(word)
		return vocab
	
	def bag_of_words(self, question):
		vec = []
		for token in self.vocab:
			vec.append(question.count(token))
		return vec

	def train(self):
		self.vocab = self.get_vocab()

		question_vectors = []
		for question in self.questions:
			question_vectors.append(self.bag_of_words(question))

		self.questions_vectors = question_vectors

	def find_closest_answer(self, query):
		user_query_vector = self.bag_of_words(query)
		
		similarities = []
		for i in range(len(self.questions)):
			sim = dot_prod(user_query_vector, self.questions_vectors[i])
			similarities.append((sim, self.responses[i]))

		similarities.sort(reverse=True)

		return similarities[0]
	
    
training_questions = list(X_train['description'])[:1000]
training_responses = list(y_train)[:1000]

model = SimilarityBasedAgent(training_questions, training_responses)
model.train()

In [89]:
user_query = "causes cough runny nose child"
response = model.find_closest_answer(user_query)
print(response)

(1, 'hi  welcome to hcm  this is most probably lower respiratory tract infection  you need to get the child examined by a doctor as soon as possible  take care ')


In [None]:
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
