In [2]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document
import numpy as np 
import os
from dotenv import load_dotenv

In [3]:
load_dotenv()

True

In [9]:

# Set OpenAI API key
openai_api_key = os.getenv('OPENAI_API_KEY')
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

# Prepare documents (complaints and non-complaints examples)
documents = [
    Document(page_content="Complaint: The device malfunctioned during surgery, causing patient discomfort."),
    Document(page_content="Complaint: The catheter was defective and led to an infection."),
    Document(page_content="Complaint: The device failed to operate as specified, resulting in an adverse event."),
    Document(page_content="Non-Complaint: The training manual was helpful in understanding the device operation."),
    Document(page_content="Non-Complaint: The product arrived on time and worked as expected."),
    Document(page_content="Non-Complaint: The customer service team provided excellent support."),
]

# Generate embeddings and index them with FAISS
faiss_index = FAISS.from_documents(documents, embeddings)
faiss_index.save_local("med_tech_faiss_index")


In [10]:
# Load FAISS index
faiss_index = FAISS.load_local("med_tech_faiss_index", embeddings, allow_dangerous_deserialization = True)

def classify_text(text: str):
    """
    Classifies a given text as Complaint or Non-Complaint using similarity search.
    
    Args:
        text (str): The input text to classify.
    
    Returns:
        str: The classification (Complaint or Non-Complaint).
    """
    # Search for the most similar document
    results = faiss_index.similarity_search(text, k=1)
    
    if results:
        # Extract the most similar document
        closest_match = results[0].page_content
        return "Complaint" if "Complaint" in closest_match else "Non-Complaint"
    else:
        return "Unable to classify"


In [11]:
# Example Usage
text_to_classify = "The device broke during usage, leading to a patient injury."
text_to_classify1 = "I am suffering from fever but I am ok now."
classification = classify_text(text_to_classify)
print(f"Classification: {classification}")


Classification: Complaint


In [None]:

def compute_similarity(text: str):
    """
    Computes similarity between the input text and indexed documents.
    
    Args:
        text (str): The input text to compare.
    
    Returns:
        List[Tuple[str, float]]: A list of documents with their similarity scores.
    """
    input_embedding = embeddings.embed_query(text)
    indexed_embeddings = faiss_index.index.reconstruct_n(0, len(faiss_index.index))

    # Compute cosine similarity
    similarities = np.dot(indexed_embeddings, input_embedding) / (
        np.linalg.norm(indexed_embeddings, axis=1) * np.linalg.norm(input_embedding)
    )
    
    # Retrieve the documents with similarity scores
    docs_with_scores = [
        (doc.page_content, score)
        for doc, score in zip(faiss_index.similarity_search_with_score(text, k=len(documents)), similarities)
    ]
    return sorted(docs_with_scores, key=lambda x: x[1], reverse=True)

# Compute similarities for the text
similarity_results = compute_similarity(text_to_classify)
for doc, score in similarity_results:
    print(f"Document: {doc} | Similarity: {score}")
