In [43]:
from IPython.display import display, Markdown
import os
from LS_AMG_RAG.data_snythesis import prompt_utils
import google.generativeai as palm
import time

import chromadb
import chromadb.utils.embedding_functions as embedding_functions
# chroma_client = chromadb.Client()
chroma_client = chromadb.PersistentClient(path="./")
# chroma_client = chromadb.HttpClient(host='localhost', port=8000)
gemini = prompt_utils.Gemini()
google_ef  = embedding_functions.GoogleGenerativeAiEmbeddingFunction(api_key=os.environ['GEMINI_API_KEY'])

In [44]:
chroma_client.heartbeat()

1711085828386036100

In [45]:
try:
    collection = chroma_client.get_or_create_collection(
        name="my_collection",
        embedding_function=google_ef,
        metadata={"hnsw:space": "cosine"})
except:
    collection = chroma_client.create_collection(
        name="my_collection",
        embedding_function=google_ef,
        metadata={"hnsw:space": "cosine"})
    documents = []
    metadata = []
    ids = []

    for root, dirs, files in os.walk("../data"):
        for file in files:
            if file.endswith(".md"):
                category = root.split('\\')[-1]
                with open(os.path.join(root, file), "r") as f:
                    file_contents = f.read()
                    documents.append(file_contents)
                    metadata.append({
                        "type": category,
                    })
                    ids.append(f"{category}_{file}")

    collection.add(
        documents=documents,
        metadatas=metadata,
        ids=ids,
    )

else:
    print(f"Total number of documents in the collection: {len(collection.get()['ids'])}")

Total number of documents in the collection: 74


In [46]:
# documents = []
# metadata = []
# ids = []

# for root, dirs, files in os.walk("../data"):
#     for file in files:
#         if file.endswith(".md"):
#             category = root.split('\\')[-1]
#             with open(os.path.join(root, file), "r") as f:
#                 file_contents = f.read()
#                 documents.append(file_contents)
#                 metadata.append({
#                     "type": category,
#                 })
#                 ids.append(f"{category}_{file}")

# collection.add(
#     documents=documents,
#     metadatas=metadata,
#     ids=ids,
# )

In [47]:
top_k = {
    1: [],
    3: [],
    5: [],
    10: [],
}

queries = [
    {'query': "What is Instagram's current business proposal?",
     'file': "Business Proposal.md"},
    {'query': "What is the marketing plan for Instagram?",
     'file': "Marketing Plan.md"},
    {'query': "What information does the progress report of Instagram contain?",
     'file': "Progress Report.md"},
    {'query': "Who are the members of Instagram's board of directors?",
     'file': "Board of Directors.md"},
    {'query': "What are the diversity and inclusion initiatives implemented by Instagram?",
     'file': 'Diversity, Equity, and Inclusion.md'},
    {'query': "What is the Marketing Objective for Influencer Collaboration Services?",
     'file': 'Marketing Plan.md'},
    {'query': "Who is the target audience of Content Creation and Curation Services?",
     'file': 'Marketing Plan.md'},
    {'query': "What is the financial update for the Reels Optimization Project?",
     'file': 'Progress Report.md'},
    {'query': "Give me details about the progress report of the Stories Upgrade project.",
     'file': 'Progress Report.md'},
    {'query': "Compare the progress report of the Feed Redesign and Stories Upgrade project and draw a conclusion on the information.",
     'file': 'Progress Report.md'}
]

step_times = {
    'retrieval': [],
    'gen': [],
    'total': [],
}

In [48]:

for idx, query in enumerate(queries):
    print(f"Query {idx+1}: {query['query']}")
    total_start_time = time.time()
    retrieval_start_time = time.time()
    results = collection.query(
        query_texts=query['query'],
        n_results=10,
    )

    for k in top_k.keys():
        top_k[k].append(any(query['file'] in x for x in results['ids'][0][:k]))

    retrieval_end_time = time.time()
    print(f"True document: {query['file']}")
    print(f"Document retrieved: {results['ids'][0][0]}")
    print(f"Retrieval time: {retrieval_end_time - retrieval_start_time:.2f} seconds")


    metaprompt = """You are a helpful and informative bot that answers questions using text from the reference document included below. \
Be sure to respond in a complete sentence, being comprehensive, including all relevant background information. \
However, you are talking to a non-technical audience, so be sure to break down complicated concepts and \
strike a friendly and converstional tone. \
Use your own knowledge base in addition to the information provided in the document to answer the question. \
Make relevant assumptions and use your best judgement to answer the question. \
  QUESTION: '{query}'
  PASSAGE: '{relevant_passage}'

  ANSWER:
"""

    gen_start_time = time.time()
    gemini_result = gemini.send_message(message=metaprompt.format(query=queries[0], relevant_passage=results['documents'][0][0])).text
    gen_end_time = time.time()
    total_end_time = time.time()

    step_times['retrieval'].append(retrieval_end_time - retrieval_start_time)
    step_times['gen'].append(gen_end_time - gen_start_time)
    step_times['total'].append(total_end_time - total_start_time)
    
    display(Markdown(gemini_result))
    print(f"Gemini time: {gen_end_time - gen_start_time:.2f} seconds")
    print(f"Total time: {total_end_time - total_start_time:.2f} seconds")
    print("\n-------------------\n")

print("Results:")
print("Top@K:")
for k in top_k.keys():
    print(f"Top@{k}: {(sum(top_k[k]) * 100) / len(top_k[k]):.2f}%")

print("\n")

print("Average Times:")
print(f"Retrieval time: {sum(step_times['retrieval']) / len(step_times['retrieval']):.2f} seconds")
print(f"Generation time: {sum(step_times['gen']) / len(step_times['gen']):.2f} seconds")
print(f"Total time: {sum(step_times['total']) / len(step_times['total']):.2f} seconds")

Query 1: What is Instagram's current business proposal?


True document: Business Proposal.md
Document retrieved: business_docs_Business Proposal.md
Retrieval time: 1.69 seconds


Instagram proposes a suite of marketing services to businesses, encompassing social media management, influencer collaborations, and content creation. Their social media marketing services involve strategic content creation, targeted ad campaigns, audience analysis, and performance tracking. Instagram also offers influencer collaboration services, leveraging their platform's influencer community to connect brands with relevant influencers for impactful partnerships. Additionally, they provide content creation and curation services, assisting businesses in developing visually appealing and engaging content for their social media presence.

Gemini time: 3.14 seconds
Total time: 4.83 seconds

-------------------

Query 2: What is the marketing plan for Instagram?
True document: Marketing Plan.md
Document retrieved: business_docs_Marketing Plan.md
Retrieval time: 0.41 seconds


Instagram's current business proposal revolves around three core offerings: social media marketing services, influencer collaboration services, and content creation and curation services. The social media marketing services encompass strategic content creation, targeted ad campaigns, audience analysis, and performance tracking to help businesses enhance their online presence and customer engagement. Instagram also offers influencer collaboration services, leveraging their platform's community of influencers to connect brands with relevant influencers for impactful partnerships. These collaborations aim to drive brand awareness, user engagement, and brand advocacy through authentic influencer endorsements. Additionally, Instagram's content creation and curation services assist businesses in developing visually appealing and engaging content for their social media presence, elevating brand storytelling and audience interaction.

Gemini time: 4.31 seconds
Total time: 4.72 seconds

-------------------

Query 3: What information does the progress report of Instagram contain?
True document: Progress Report.md
Document retrieved: business_docs_Progress Report.md
Retrieval time: 0.42 seconds


I apologize, but the provided document does not contain Instagram's current business proposal. The document primarily focuses on a progress report of various projects within Instagram, including Feed Redesign, Stories Upgrade, Direct Messaging Update, Explore Algorithm Enhancement, IGTV Expansion, and Reels Optimization. It does not provide insights into Instagram's overall business strategy or its current business proposals.

Gemini time: 4.56 seconds
Total time: 4.99 seconds

-------------------

Query 4: Who are the members of Instagram's board of directors?
True document: Board of Directors.md
Document retrieved: company_bylaws_Board of Directors.md
Retrieval time: 0.44 seconds


Based on the provided document, I'm unable to find specific information regarding Instagram's current business proposal. The document primarily focuses on the composition, contributions, and contact information of Instagram's Board of Directors. It does not provide insights into the company's business strategies or proposals.

Gemini time: 3.41 seconds
Total time: 3.85 seconds

-------------------

Query 5: What are the diversity and inclusion initiatives implemented by Instagram?
True document: Diversity, Equity, and Inclusion.md
Document retrieved: company_bylaws_Diversity, Equity, and Inclusion.md
Retrieval time: 0.42 seconds


I apologize, but the provided document does not contain information about Instagram's current business proposal. The document focuses on Instagram's commitment to diversity, equity, and inclusion (DEI) within their organization and does not provide insights into their business strategies or proposals.

Gemini time: 4.57 seconds
Total time: 4.98 seconds

-------------------

Query 6: What is the Marketing Objective for Influencer Collaboration Services?
True document: Marketing Plan.md
Document retrieved: business_docs_Marketing Plan.md
Retrieval time: 0.41 seconds


Based on the provided document, Instagram's current business proposal is centered around three primary service offerings:

1. **Social Media Marketing Services:** This service caters to businesses, especially small businesses, startups, and e-commerce ventures, assisting them in enhancing their online presence and engaging with a younger demographic on social media platforms. Instagram offers personalized social media marketing strategies, influencer collaborations, interactive content, data-driven insights, and performance tracking to optimize campaigns and drive customer engagement and conversions.

2. **Influencer Collaboration Services:** Instagram recognizes the power of influencer marketing and offers influencer collaboration services to fashion brands, lifestyle products, and beauty companies. They help brands identify relevant influencers, establish authentic partnerships, and co-create compelling content that resonates with target audiences. Performance monitoring and analysis ensure that influencer campaigns align with brand objectives and drive engagement, brand affinity, and increased reach.

3. **Content Creation and Curation Services:** For media companies, creative agencies, and lifestyle brands seeking to elevate their content quality and engagement, Instagram provides content creation and curation services. They focus on understanding audience preferences and content consumption habits on social media, developing tailored content plans, and producing visually captivating content that aligns with brand guidelines. Interactive storytelling elements and user-generated content campaigns enhance audience interaction and create immersive brand experiences.

Gemini time: 7.02 seconds
Total time: 7.44 seconds

-------------------

Query 7: Who is the target audience of Content Creation and Curation Services?
True document: Marketing Plan.md
Document retrieved: business_docs_Marketing Plan.md
Retrieval time: 0.43 seconds


**Instagram's Current Business Proposal**

Instagram, renowned for its visual storytelling prowess, presents a multifaceted business proposal that encompasses three core offerings:

**1. Social Media Marketing Services:**

- **Target Audience:** Small businesses, startups, and e-commerce ventures seeking to amplify their online presence and connect with a younger audience on social media.
- **Services:** Personalized marketing strategies, influencer collaborations, data-driven insights, and performance tracking to optimize campaigns and drive customer engagement and conversions.

**2. Influencer Collaboration Services:**

- **Target Audience:** Fashion brands, lifestyle products, and beauty companies aiming to harness the power of influencer marketing.
- **Services:** Identification of relevant influencers, establishment of authentic partnerships, co-creation of compelling content, performance monitoring, and leveraging of influencer content across multiple platforms to maximize reach and drive brand affinity.

**3. Content Creation and Curation Services:**

- **Target Audience:** Media companies, creative agencies, and lifestyle brands seeking to elevate their content quality and engagement.
- **Services:** Tailored content planning, visually captivating content production, implementation of user-generated content campaigns, and interactive storytelling elements to enhance audience interaction and create immersive brand experiences.

Through these services, Instagram empowers businesses to harness the platform's vast reach, influential community, and immersive content formats to achieve their marketing objectives and connect with their target audiences in a meaningful and engaging way.

Gemini time: 7.33 seconds
Total time: 7.76 seconds

-------------------

Query 8: What is the financial update for the Reels Optimization Project?
True document: Progress Report.md
Document retrieved: business_docs_Progress Report.md
Retrieval time: 0.42 seconds


Based on the provided document, I'm unable to find specific information regarding Instagram's current business proposal. This document focuses on a progress report of various projects within Instagram, including Feed Redesign, Stories Upgrade, Direct Messaging Update, Explore Algorithm Enhancement, IGTV Expansion, and Reels Optimization. It does not provide insights into the company's business strategies or proposals.

Gemini time: 4.51 seconds
Total time: 4.92 seconds

-------------------

Query 9: Give me details about the progress report of the Stories Upgrade project.
True document: Progress Report.md
Document retrieved: business_docs_Progress Report.md
Retrieval time: 0.42 seconds


The provided document does not contain information about Instagram's current business proposal. It primarily focuses on a progress report of various projects within Instagram, such as Feed Redesign, Stories Upgrade, Direct Messaging Update, Explore Algorithm Enhancement, IGTV Expansion, and Reels Optimization. Without access to up-to-date information or insights into Instagram's business strategy, I cannot provide a response regarding their current business proposal.

Gemini time: 4.43 seconds
Total time: 4.85 seconds

-------------------

Query 10: Compare the progress report of the Feed Redesign and Stories Upgrade project and draw a conclusion on the information.
True document: Progress Report.md
Document retrieved: business_docs_Progress Report.md
Retrieval time: 0.42 seconds


I'm sorry, but the document you provided does not contain information about Instagram's current business proposal. The document focuses on a progress report of various projects within Instagram and does not provide insights into the company's business strategy or proposals.

Gemini time: 5.13 seconds
Total time: 5.55 seconds

-------------------

Results:
Top@K:
Top@1: 100.00%
Top@3: 100.00%
Top@5: 100.00%
Top@10: 100.00%


Average Times:
Retrieval time: 0.55 seconds
Generation time: 4.84 seconds
Total time: 5.39 seconds
