### Importar Librerias necesarioas

In [4]:
# Importar las librerias necesarias
import json 
import spacy 
import numpy as np 
from sklearn.metrics.pairwise import cosine_similarity 


### Carga del modelo de SpaCy y lectura de las respuestas desde el json

In [26]:
# Carga del modelo de lenguaje en Ingles de spaCy 
nlp = spacy.load("en_core_web_lg")

# Leer las respuestas desde el archivo JSON
with open('respuesta.json') as file:
    data = json.load(file)
    responses = data["responses"]

### Transformar respuestas en embeddings

In [12]:
# Dataset transformado en  embeddings
responses_embeddings = np.array([nlp(response).vector for response in responses])

responses_embeddings

array([[ 1.0904796 , -2.3488302 , -0.543687  , ..., -2.584807  ,
        -1.9785779 ,  3.807547  ],
       [ 2.3959067 , -1.38505   ,  0.46039206, ..., -1.0013645 ,
        -2.184252  ,  3.4902706 ],
       [ 2.8996904 , -0.49203098, -1.9728084 , ..., -1.2895554 ,
        -2.4242344 ,  3.456478  ],
       ...,
       [-1.62018   ,  0.3847784 , -2.3261259 , ..., -3.6405833 ,
        -0.6643802 ,  2.6210325 ],
       [-0.24433705,  0.29863614, -1.9623681 , ..., -1.535132  ,
        -2.1197605 ,  0.11507459],
       [-3.3221512 , -0.7779014 , -2.7207854 , ..., -2.972469  ,
        -1.5138054 ,  0.92538685]], dtype=float32)

In [31]:
# Función para preprocesar el texto
def preprocess(text):
    doc = nlp(text)
    tokens = [token.lemma_.lower() for token in doc if not token.is_stop]
    return ' '.join(tokens)

### Funcion para busqueda de respuestas similares

In [32]:
def find_most_similar_response(question):
    # Preprocesar la pregunta
    question_processed = preprocess(question)
    question_embedding = nlp(question_processed).vector

    # Calcula la similitud de coseno entre la pregunta y las respuestas
    similarities = cosine_similarity([question_embedding], responses_embeddings)

    # obtiene los indices de las 10 respuestas mas similares
    top_indices = np.argsort(similarities[0])[-10:][::-1]

    # Selecciona la respuesta con mayor similitud entre las 10 respuestas
    selected_response = responses[top_indices[0]]

    return selected_response

In [33]:
# Preguntas de ejemplo
example_questions = [
    "What are the visiting hours?",
    "When can I visit the patients?",
    "How can I schedule a medical appointment?",
    "Is the emergency room open 24 hours?",
    "What is the number to schedule an appointment?",
    "Physical therapy sessions?",
    "Is there free WiFi?",
    "Is there quiet hours at the hospital?",
    
]
# Prueba el chatbot con las preguntas de ejemplo
for question in example_questions:
    selected_response = find_most_similar_response(question)
    print("Question: " ,question)
    print("Selected response: " ,selected_response)
    print("--------------------------------------------------")

Question:  What are the visiting hours?
Selected response:  Patients can have two visitors at a time during visiting hours.
--------------------------------------------------
Question:  When can I visit the patients?
Selected response:  Family members can call the patient's room directly for updates.
--------------------------------------------------
Question:  How can I schedule a medical appointment?
Selected response:  For emergency medical assistance, dial 911 immediately.
--------------------------------------------------
Question:  Is the emergency room open 24 hours?
Selected response:  The emergency room is open 24 hours a day.
--------------------------------------------------
Question:  What is the number to schedule an appointment?
Selected response:  Family members can call the patient's room directly for updates.
--------------------------------------------------
Question:  Physical therapy sessions?
Selected response:  Family members can call the patient's room directly f