In [1]:
from IPython.display import display, Markdown
import importlib
import json

utils = importlib.import_module("LS_AMG_RAG.utils", package="LS_AMG_RAG")
importlib.reload(utils)
prompt_utils = importlib.import_module("LS_AMG_RAG.prompt_utils", package="LS_AMG_RAG")
importlib.reload(prompt_utils)

from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

uri = "mongodb+srv://team-all:HHcJOjFa0lD5zHma@lms-amg-rag.kqmslmy.mongodb.net/?retryWrites=true&w=majority"
client = MongoClient(uri, server_api=ServerApi("1"))
docs = client['RAG']['Docs']
metadata = client['RAG']['Metadata']

gemini = prompt_utils.Gemini()

try:
    client.admin.command("ping")
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


### Steps:
1. Get input query
2. Get keywords from query
3. Get the top k documents from the 'Docs' index and the 'Metadata' index
4. Apply vector search on the top 2k documents and the query to get the top 5 documents

In [2]:
query = "How do Instagram's employee ESOPs align with the company's objective to retain employees longer?"

print(f"Keywords from query: {', '.join(utils.keyword_yake(query))}")

print(f"Metadata from query: {json.dumps(utils.extract_metadata(query), indent=2)}")

Keywords from query: employee esops align, retain employees longer, employee esops, esops align, retain employees, employees longer, instagram, esops, employee, align


OSError: [E050] Can't find model 'LS_AMG_RAG/metadata_extraction/custom_ner/output/model-best'. It doesn't seem to be a Python package or a valid path to a data directory.

In [238]:
def keyword_search(query, collection, limit=10):
    result = client['RAG'][collection].aggregate([
        {
            '$search': {
                'index': collection,
                'text': {
                    'query': ' '.join(utils.keyword_yake(query)), # to search for the keywords
                    # 'query': query, # to search for the whole query
                    'path': {
                        'wildcard': '*'
                    }
                },
                "scoreDetails": True,
            }
        },
        {
            '$limit': limit
        },
        {
            '$project': {
                'score': {
                    '$meta': 'searchScore'
                },
                # "scoreDetails": {"$meta": "searchScoreDetails"},
                'Doc_ID': 1,
                'Keywords': 1,
            }
        }
    ])

    return result  # not a dict

In [239]:
result = list(keyword_search(query, collection='Metadata'))
doc_ids = [doc['Doc_ID'] for doc in result]
titles = [docs.find_one({'_id': doc_ids[idx]})['Doc_Title'] for idx in range(len(doc_ids))]
scores = [doc['score'] for doc in result]
print([(title, score) for title, score in zip(titles, scores)])

[('Employee Handbook.md', 1.607875108718872), ('Board of Directors.md', 1.561308741569519), ('About Instagram.md', 1.34767484664917), ('Shareholders.md', 1.2813968658447266), ('Business Proposal.md', 1.1969292163848877), ('Progress Report.md', 0.9864063858985901), ('employee_contract_Sarah Wilson.md', 0.3620193600654602), ('employee_esop_Emily Brown.md', 0.33952051401138306), ('employee_esop_Lucas Martinez.md', 0.3162347078323364), ('employee_esop_Zoe Lee.md', 0.3162347078323364)]


In [240]:
def vector_search(query, collection, limit=3):
    result = client['RAG'][collection].aggregate([
        {
            '$vectorSearch': {
                'index': "gemini_vector_index",
                'path': 'Gemini_vector',
                'filter': {
                    'Doc_Title': {
                        '$in': titles
                    }
                },
                'queryVector': utils.gemini_vector(query, "query"),
                'numCandidates': len(titles),
                'limit': limit
            }
        },
        {
            '$project': {
                'score': {
                    '$meta': 'vectorSearchScore'
                },
                'Doc_Title': 1,
                'Text': 1,
            }
        }
    ])

    return result  # not a dict

In [241]:
vector_result = list(vector_search(query, collection='Docs'))

In [242]:
vector_result[0]

{'_id': ObjectId('65f48815228d1c7a8ec33eb7'),
 'Doc_Title': 'employee_esop_Emily Brown.md',
 'Text': '\n Employee Stock Ownership Plan (ESOP)\n\n Employee Information:\n- Employee Name: Emily Brown\n- Position: UX Designer\n- Department: Design\n\n ESOP Details:\n- Allocation of Shares: 90 shares\n- Vesting Schedule: 4-year vesting schedule\n- Eligibility Criteria: Full-time employment\n- Valuation Methods: Fair market value\n- Rights and Responsibilities: Voting rights, dividend entitlement\n\nThis Employee Stock Ownership Plan (ESOP) is entered into between Instagram (hereinafter referred to as "Employer") and Emily Brown (hereinafter referred to as "Employee") on April 1, 2022.\n\nThe ESOP details the allocation of shares, vesting schedule, eligibility criteria, valuation methods, and the rights and responsibilities of participating employees. This plan aims to provide employees with an ownership interest in the company, aligning their interests with that of the company.\n',
 'score

In [243]:
metaprompt = """You are a helpful and informative bot that answers questions using text from the reference document included below. \
Be sure to respond in a complete sentence, being comprehensive, including all relevant background information. \
However, you are talking to a non-technical audience, so be sure to break down complicated concepts and \
strike a friendly and converstional tone. \
Use your own knowledge base in addition to the information provided in the document to answer the question. \
Make relevant assumptions and use your best judgement to answer the question. \

DOCUMENT TITLE: {relevant_document_title}

DOCUMENT:

{relevant_document}

Use the above information from the document to answer the following question:
{query}

ANSWER:
"""

metaprompt = metaprompt.format(query=query, relevant_document_title=vector_result[0]['Doc_Title'].split('.')[0], relevant_document=vector_result[0]['Text'])

result = gemini.send_message(metaprompt).text

In [244]:
display(Markdown(result))

Instagram's Employee Stock Ownership Plans (ESOPs) provide employees with an ownership  interest in the company, aligning their purpose and goals with that of the company. By doing this, ESOPs help to create a sense of loyalty and commitment among employees, which can lead to increased employee retention. Additionally, ESOPs can provide employees with a financial incentive to stay with the company, as the value of their shares will increase over time as the company grows and becomes more profitable.

In [245]:
display(Markdown(metaprompt))

You are a helpful and informative bot that answers questions using text from the reference document included below. Be sure to respond in a complete sentence, being comprehensive, including all relevant background information. However, you are talking to a non-technical audience, so be sure to break down complicated concepts and strike a friendly and converstional tone. Use your own knowledge base in addition to the information provided in the document to answer the question. Make relevant assumptions and use your best judgement to answer the question. 
DOCUMENT TITLE: employee_esop_Emily Brown

DOCUMENT:


 Employee Stock Ownership Plan (ESOP)

 Employee Information:
- Employee Name: Emily Brown
- Position: UX Designer
- Department: Design

 ESOP Details:
- Allocation of Shares: 90 shares
- Vesting Schedule: 4-year vesting schedule
- Eligibility Criteria: Full-time employment
- Valuation Methods: Fair market value
- Rights and Responsibilities: Voting rights, dividend entitlement

This Employee Stock Ownership Plan (ESOP) is entered into between Instagram (hereinafter referred to as "Employer") and Emily Brown (hereinafter referred to as "Employee") on April 1, 2022.

The ESOP details the allocation of shares, vesting schedule, eligibility criteria, valuation methods, and the rights and responsibilities of participating employees. This plan aims to provide employees with an ownership interest in the company, aligning their interests with that of the company.


Use the above information from the document to answer the following question:
How do Instagram's employee ESOPs align with the company's objective to retain employees longer?

ANSWER:
