In [1]:
human_eye_info = [
    "The human eye is an incredible and intricate organ that allows us to perceive the world around us.",
    "It converts light into neural signals, which the brain interprets to form visual images.",
    "This process requires the coordinated action of numerous specialized structures, each playing a unique role.",
    "The eye's structure is complex, comprising several key parts.",
    "The cornea is the eye's outermost layer and works as the window that focuses and controls the entry of light.",
    "It is transparent, dome-shaped, and acts as a primary lens, refracting light onto the inner structures.",
    "The sclera, or the 'white of the eye,' is a tough, fibrous tissue that protects the eye and provides attachment points for muscles.",
    "The iris is the colored part of the eye and contains muscles that expand and contract to control the size of the pupil.",
    "The pupil, the black circular opening in the iris, adjusts to allow more or less light into the eye.",
    "It dilates in low light and constricts in bright light, adapting to maintain optimal vision.",
    "Located behind the iris, the lens fine-tunes the focus by adjusting its shape, thanks to the ciliary muscles that alter its curvature.",
    "The retina is a layer of tissue at the back of the eye containing photoreceptor cells (rods and cones) that detect light and send electrical signals to the brain.",
    "The macula is a small, central area in the retina responsible for high-resolution vision.",
    "The fovea, within the macula, is essential for sharp, detailed central vision.",
    "The optic nerve transmits visual information from the retina to the brain, where interpretation occurs.",
    "Vision involves a series of steps as light enters the eye and is converted into images.",
    "Light passes through the cornea, which refracts and bends the light.",
    "The cornea and lens work together to focus the light onto the retina.",
    "The lens further adjusts its curvature to ensure that light rays focus correctly on the retina, producing a sharp image.",
    "The retina contains millions of rods and cones.",
    "Rods are more sensitive to light and allow us to see in dim lighting, while cones provide color vision and detail.",
    "Photoreceptors convert light into electrical impulses, which travel through the optic nerve to the brain's visual cortex.",
    "The brain processes these signals to form the images we perceive.",
    "Rods are highly sensitive and help us see in low-light conditions.",
    "They are more numerous and are responsible for peripheral and night vision.",
    "Cones are concentrated in the fovea and allow us to see fine detail and color.",
    "There are three types of cones, each sensitive to different wavelengths of light (red, green, and blue).",
    "Refractive errors, including myopia (nearsightedness), hyperopia (farsightedness), and astigmatism, are caused by irregularities in the cornea or lens.",
    "Cataracts occur when the lens becomes cloudy, leading to blurred vision.",
    "They are common with aging and can be corrected surgically.",
    "Glaucoma is a group of eye diseases that damage the optic nerve, often due to increased pressure in the eye.",
    "It can result in vision loss if untreated.",
    "Age-related macular degeneration (AMD) affects the central vision due to the deterioration of the macula.",
    "Diabetic retinopathy occurs when high blood sugar damages blood vessels in the retina, potentially leading to blindness if untreated.",
    "Regular eye exams, protective eyewear, and maintaining a healthy lifestyle can prevent many eye problems.",
    "Avoiding excessive screen time, protecting eyes from UV rays, and managing chronic health conditions are also vital.",
    "The human eye is a marvel of biology, intricately designed to convert light into visual images that allow us to perceive and navigate our surroundings.",
    "Understanding the anatomy and function of the eye, along with common disorders, can help us take better care of this essential organ.",
    "KeivanJamali live in Yazd."
]


# Tokenization and embedding


In [2]:
import faiss
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModel


ModuleNotFoundError: No module named 'faiss'

In [3]:
# Initialize the tokenizer and model for generating embeddings.
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/paraphrase-MiniLM-L6-V2")
model = AutoModel.from_pretrained("sentence-transformers/paraphrase-MiniLM-L6-V2")

In [4]:
# Function to tokenize the input and generate its embeddings
def embed_text(text, tokenizer, model):
    # Tokenize the input text, return tensors in pytorch, apply padding and truncation
    inputs = tokenizer(text,
                       return_tensors="pt",
                       padding=True,
                       truncation=True)
    
    # Disable gradient calculation
    with torch.inference_mode():
        # pass the tokenized inputs through the model to the last state
        embeddings = model(**inputs).last_hidden_state

        # get the embeddings from the model
        embeddings = embeddings.mean(dim=1)

    return embeddings

In [5]:
# Initialize a list to stoe the embddings
document_embeddings = []

# loop through the documents to compute the embeddings
for doc in human_eye_info:
    doc_embedding = embed_text(doc, tokenizer, model)
    document_embeddings.append(doc_embedding)

In [6]:
# Concatanate all embeddings into a pytorch tensor, move it to the cpu, and convert to numpy array.
document_embeddings = torch.cat(document_embeddings).cpu().numpy()
document_embeddings

array([[-9.53816227e-04,  1.08257189e-01, -3.05746794e-01, ...,
         2.97097623e-01,  4.49789874e-02, -1.77720591e-01],
       [-6.39171079e-02, -2.55471796e-01, -1.38776256e-02, ...,
         7.94618487e-01,  2.01459453e-01, -3.56312662e-01],
       [-2.11764693e-01,  3.38874236e-02, -2.10659251e-01, ...,
         5.02929501e-02,  3.35229337e-01, -1.12758599e-01],
       ...,
       [-1.10684074e-01,  1.27678392e-02, -2.23860085e-01, ...,
         3.44089419e-01,  2.20527858e-01, -2.00841233e-01],
       [ 3.70444283e-02, -1.27591178e-01,  6.67935237e-02, ...,
         3.61070693e-01,  4.62078869e-01,  7.47982189e-02],
       [ 4.47124720e-01,  5.52255452e-01,  7.55414367e-05, ...,
        -3.41778129e-01, -3.60967070e-01, -3.35635424e-01]], dtype=float32)

In [7]:
index = faiss.IndexFlatL2(document_embeddings.shape[1])
index.add(document_embeddings)

In [8]:
# Retrieval -> Build a functino to retreve information

def retrieve(query, tokenizer, model, index, documents, top_k=3):
    query_embedding = embed_text(query, tokenizer, model).cpu().numpy()
    if len(query_embedding.shape) == 1:
        query_embedding = query_embedding.reshape(1, -1)
    distances, indices = index.search(query_embedding, top_k)
    return [documents[i] for i in indices[0]], distances[0]

In [9]:
# Test the function
query = "What are lenses?"
query = "Where is keivan jamali come from?"
retrieved_docs, distances = retrieve(query, tokenizer, model, index, human_eye_info, top_k=3)


: 