### Importar Librerias necesarioas

In [2]:
# Importar las librerias necesarias
import json 
import spacy 
import numpy as np 
from sklearn.metrics.pairwise import cosine_similarity 


### Carga del modelo de SpaCy y lectura de las respuestas desde el json

In [3]:
# Carga del modelo de lenguaje en Ingles de spaCy 
nlp = spacy.load("en_core_web_lg")

# Leer las respuestas desde el archivo JSON
with open('respuesta.json') as file:
    data = json.load(file)
    responses = data["responses"]

### Transformar respuestas en embeddings

In [4]:
# Dataset transformado en  embeddings
responsesEmbeddings = np.array([nlp(response).vector for response in responses])

responsesEmbeddings

array([[ 1.0904796 , -2.3488302 , -0.543687  , ..., -2.584807  ,
        -1.9785779 ,  3.807547  ],
       [ 2.0794666 , -1.1438301 ,  0.46783298, ..., -1.0116231 ,
        -2.111757  ,  3.6217308 ],
       [ 2.8996904 , -0.49203098, -1.9728084 , ..., -1.2895554 ,
        -2.4242344 ,  3.456478  ],
       ...,
       [-1.62018   ,  0.3847784 , -2.3261259 , ..., -3.6405833 ,
        -0.6643802 ,  2.6210325 ],
       [-0.47997463,  0.46028596, -1.7876245 , ..., -1.6465485 ,
        -2.2502735 ,  0.1279058 ],
       [-3.3221512 , -0.7779014 , -2.7207854 , ..., -2.972469  ,
        -1.5138054 ,  0.92538685]], dtype=float32)

In [5]:
# Función para preprocesar el texto
def preprocess(text):
    doc = nlp(text)
    tokens = [token.lemma_.lower() for token in doc if not token.is_stop] # Convierte a lema, minuscula y elimina stop words(palabras "vacias")
    return ' '.join(tokens) # Devuelve los tokens unidos en una cadena

### Funcion para busqueda de respuestas similares

In [6]:
def findMostSimilarReponses(question):
    # Preprocesar la pregunta
    questionProcessed = preprocess(question)
    questionEmbedding = nlp(questionProcessed).vector

    # Calcula la similitud de coseno entre la pregunta y las respuestas
    similarities = cosine_similarity([questionEmbedding], responsesEmbeddings)

    # obtiene los indices de las 10 respuestas mas similares
    top_indices = np.argsort(similarities[0])[-10:][::-1]

    # Selecciona la respuesta con mayor similitud entre las 10 respuestas
    selected_response = responses[top_indices[0]]

    return selected_response

In [21]:
# Preguntas de ejemplo
exampleQuestions = [
    "What are the visiting hours?",
    "What are the visiting rules?",
    "How can I get a medical assistance?",
    "Is the emergency room open 24 hours?",
    "Information about Lab results",
    "lab results",
    "Physical therapy sessions?",
    "Is there free WiFi?",
    "Is there a hospital cafeteria?",
]
# Prueba el chatbot con las preguntas de ejemplo
for question in exampleQuestions:
    selectedResponse = findMostSimilarReponses(question)
    print("Question: " ,question)
    print("Selected response: " ,selectedResponse)
    print("--------------------------------------------------")

Question:  What are the visiting hours?
Selected response:  Patients can have two visitors at a time during visiting hours.
--------------------------------------------------
Question:  What are the visiting rules?
Selected response:  Family members can call the patient's room directly for updates.
--------------------------------------------------
Question:  How can I get a medical assistance?
Selected response:  For emergency medical assistance, dial 911 immediately.
--------------------------------------------------
Question:  Is the emergency room open 24 hours?
Selected response:  The emergency room is open 24 hours a day.
--------------------------------------------------
Question:  Information about Lab results
Selected response:  Physical therapy sessions can be scheduled by referral from a doctor.
--------------------------------------------------
Question:  lab results
Selected response:  Lab test results are usually available within 24 to 48 hours.
--------------------------