In [1]:
def create_collection_and_vector_index(database, mongo_collection, vector_property, embeddings_dimensions):

    collection = database[mongo_collection]

    database.command(
        {
            "createIndexes": mongo_collection,
            "indexes": [
                {
                    "name": "VectorSearchIndex",
                    "key": {
                        vector_property: "cosmosSearch"
                    },
                    "cosmosSearchOptions": { 
                        "kind": "vector-hnsw", 
                        "m": 16, # default value 
                        "efConstruction": 64, # default value 
                        "similarity": "COS", 
                        "dimensions": embeddings_dimensions
                    } 
                } 
            ] 
        }
    )
    
    return collection

In [None]:
@retry(wait=wait_random_exponential(min=1, max=200), stop=stop_after_attempt(20))
def generate_embeddings(text):
    '''
    Generate embeddings from string of text.
    This will be used to vectorize data and user input for interactions with Azure OpenAI.
    '''


    response = openai_client.embeddings.create(
        input = text, 
        model = openai_embeddings_deployment, 
        dimensions = openai_embeddings_dimensions)
    
    #sleep(.1)
    
    embeddings = response.model_dump()
    return embeddings['data'][0]['embedding']

In [None]:
# open the file and stream the data to ingest
stream = urllib.request.urlopen(storage_file_url)

success = True
counter = 0

# iterate through the stream, generate vectors and insert into collection
for object in ijson.items(stream, 'item', use_float=True):

    ## Rewrite the data vectorization and ingestion

    # generate an embedding for each overview to add to vector index
    vectorArray = generate_embeddings(object['overview'])

    # add the embedding to the JSON document
    object[mongo_vector_property] = vectorArray

    # insert the document into the collection
    collection.insert_one(object)

    counter += 1

    if counter % 100 == 0:
        print("Inserted {} documents into collection: '{}'.".format(counter, collection.name))
        sleep(.5)
    


print("Data inserted into collection: '{}'.\n".format(collection.name))

In [None]:
def vector_search(collection, vectors, similarity_score=0.02, num_results=3):
    
    
    pipeline = [
        {
        '$search': {
            "cosmosSearch": {
                "vector": vectors,
                "path": mongo_vector_property,
                "k": num_results,
                "efsearch": 40 # optional for HNSW only 
            },
            "returnStoredSource": True }},
            { '$project': { 'similarityScore': { '$meta': 'searchScore' }, 'document' : '$$ROOT' } },
            { '$match': { "similarityScore": { '$gt': similarity_score } } 
        }   
    ]

    results = list(collection.aggregate(pipeline))

    # Exclude the 'vector' to reduce payload size to LLM and _id properties to avoid serialization issues 
    for result in results:
        del result['document']['vector']
        del result['_id']
        del result['document']['_id']
    
    return results

In [9]:
def generate_completion(user_prompt, vector_search_results, chat_history):
    
    system_prompt = '''
    You are an intelligent assistant for the Movie Lens Expert AI Assistant.
    You are designed to provide helpful answers to user questions about movies in your database.
    You are friendly, helpful, and informative and wee bit cheeky.
        - Only answer questions related to the information provided below.
        - Write two lines of whitespace between each answer in the list.
        - If you're unsure of an answer, you can say ""I don't know"" or ""I'm not sure"" and recommend users search themselves."
    '''

    # Create a list of messages as a payload to send to the OpenAI Completions API

    # System Prompt
    messages=[{"role": "system", "content": system_prompt}]

    # Chat history
    for item in chat_history:
        messages.append({"role": "system", "content": item['prompt'] + " " + item['completion']})

    # User Prompt
    messages.append({"role": "user", "content": user_prompt})

    # Add the vector search results
    for item in vector_search_results:
        messages.append({"role": "system", "content": json.dumps(item['document'])})

    # Send the payload to the OpenAI Completions API
    response = openai_client.chat.completions.create(
        model = openai_completions_deployment, 
        messages = messages)
   
    
    return response.model_dump()
    

In [None]:
def chat_completion(user_input):

    # Generate embeddings from the user input
    user_embeddings = generate_embeddings(user_input)

    # Query the chat history cache first to see if this question has been asked before
    # Similarity score set to 0.99, will only return exact matches. Limit to 1 result.
    cache_results = vector_search(cache, user_embeddings, similarity_score=0.99, num_results=1)

    if len(cache_results) > 0:
        
        return cache_results[0]['document']['completion']
        
    else:
        
        # Perform a vector search on the user input
        search_results = vector_search(movies, user_embeddings)

        # Get recent chat history to send to GPT model
        chat_history = get_chat_history(3)
        
        # Generate completion based on the search results and user input with chat history for context
        completions_results = generate_completion(user_input, search_results, chat_history)

        # Cache the generated completion
        cache_response(user_input, user_embeddings, completions_results)

        # Return the generated LLM completion
        return completions_results['choices'][0]['message']['content'] 
    

In [None]:
query = "What movie has Buzz Lightyear?"
embeddings = generate_embeddings(query)
results = vector_search(movies, embeddings)
for result in results: 
    #print(result)
    print(f"Similarity Score: {result['similarityScore']}")  
    print(f"Title: {result['document']['title']}")  