In [0]:
from utils import get_apikey

In [0]:
import openai
openai.api_key = get_apikey() # Set your OpenAI API key here

In [0]:
import numpy as np
from sklearn.neighbors import NearestNeighbors

In [0]:
def get_gpt4_embedding(text): # Function to get embeddings from GPT-4 model
    response = openai.Embedding.create(
        input=text,
        model="text-embedding-3-small"  # Use the latest embedding model suitable for GPT-4
    )
    return response['data'][0]['embedding'] # Extract and return the embedding from the response

In [0]:
# List of example documents to be used in the index
documents = [
    "This is the Fundamentals of RAG course.",
    "Educative is an AI-powered online learning platform.",
    "There are several Generative AI courses available on Educative.",
    "I am writing this using my keyboard.",
    "JavaScript is a good programming language :)"
]

# Get embeddings for each document using the get_gpt4_embedding function
embeddings = [get_gpt4_embedding(doc) for doc in documents]
embeddings = np.array(embeddings)
print("--" * 50)
print("This is how it looks after going through an embedding model:\n")
print(embeddings)

----------------------------------------------------------------------------------------------------
This is how it looks after going through an embedding model:

[[-3.02747115e-02  4.11193855e-02  9.89152770e-03 ... -1.07105263e-02
   4.71630320e-03 -2.53607202e-02]
 [-1.95051357e-02 -5.74573763e-02 -3.16413902e-02 ...  2.26169955e-02
  -1.56339835e-02  1.40656056e-02]
 [-4.96361889e-02 -1.45944459e-02  5.92318475e-02 ... -2.19670273e-02
   9.87825450e-03 -5.52315637e-03]
 [ 2.41343491e-02  1.09905740e-02  4.54153754e-02 ...  7.39440584e-05
  -2.86157825e-03 -3.66968922e-02]
 [-3.84624414e-02  1.66771654e-02 -3.34709547e-02 ... -2.31614336e-02
  -9.42318141e-03 -1.19655747e-02]]


In [0]:
# Fit a NearestNeighbors model on the document embeddings using cosine similarity
index = NearestNeighbors(n_neighbors=1, metric='cosine').fit(embeddings)

# Function to query the index with a given text query
def query_index(query):
    query_embedding = get_gpt4_embedding(query)
    query_embedding = np.array([query_embedding])
    distance, indices = index.kneighbors(query_embedding)
    print("Nearest document index:", indices[0][0])
    print("Distance from query:", distance[0][0])
    return documents[indices[0][0]]

# Example Query
query = "What is JS?"
print("Query:", query)
result = query_index(query) # Retrieve the most similar document to the query

print("Retrieved document:", result) # Print the retrieved document

Query: What is JS?
Nearest document index: 4
Distance from query: 0.534633222348955
Retrieved document: JavaScript is a good programming language :)
