In [1]:
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

In [2]:
from IPython.display import display, Markdown
import importlib
import json

utils = importlib.import_module("LS_AMG_RAG.utils", package="LS_AMG_RAG")
importlib.reload(utils)
prompt_utils = importlib.import_module("LS_AMG_RAG.prompt_utils", package="LS_AMG_RAG")
importlib.reload(prompt_utils)

<module 'LS_AMG_RAG.prompt_utils' from 'D:\\MajorProject\\Repo\\LS_AMG_RAG\\LS_AMG_RAG\\prompt_utils.py'>

In [1]:
from IPython.display import display, Markdown
import os
from LS_AMG_RAG.data_snythesis import prompt_utils
import google.generativeai as palm

import chromadb
import chromadb.utils.embedding_functions as embedding_functions
# chroma_client = chromadb.Client()
chroma_client = chromadb.PersistentClient(path="./")
gemini = prompt_utils.Gemini()
google_ef  = embedding_functions.GoogleGenerativeAiEmbeddingFunction(api_key=os.environ['GEMINI_API_KEY'])

In [2]:
collection = chroma_client.get_or_create_collection(name="my_collection", embedding_function=google_ef)

In [3]:
documents = []
metadata = []
ids = []
# for idx, file_name in enumerate(os.listdir("../data/business_docs")):
#     with open(f"../data/business_docs/{file_name}", "r") as f:
#         file_contents = f.read()
#         documents.append(file_contents)
#         metadata.append({
#             "type": "business document",
#         })
#         ids.append(f"business_doc_{idx}")

# add all files within the 'data/' directory to chromadb using os.walk
for root, dirs, files in os.walk("../data"):
    for file in files:
        if file.endswith(".md"):
            category = root.split('\\')[-1]
            with open(os.path.join(root, file), "r") as f:
                file_contents = f.read()
                documents.append(file_contents)
                metadata.append({
                    "type": category,
                })
                ids.append(f"{category}_{file}")

In [4]:
len(documents)

74

In [5]:
collection.add(
    documents=documents,
    metadatas=metadata,
    ids=ids,
)

In [19]:
import time
def generate_query(quer):
    metaprompt = """Please rewrite and enhance the given question to make it suitable for retrieving information from the reference document. Be comprehensive, and use your own knowledge base in addition to the information provided in the document. Make relevant assumptions and exercise your best judgment to answer the question.
    QUESTION: '{query}'

    ANSWER:
    """
    start_time = time.time()
    gemini_result = gemini.send_message(message=metaprompt.format(query=quer)).text
    end_time = time.time()
    total_end_time = time.time()
    display(Markdown(gemini_result))
    print(f"Gemini time: {end_time - start_time} seconds")
    # print(f"Total time: {total_end_time - total_start_time} seconds")
    return gemini_result

In [20]:
def generate_responses(quer,collection):
    total_start_time = time.time()
    start_time = time.time()
    results = collection.query(
        query_texts=quer,
        n_results=3
    )
    end_time = time.time()

    print(f"Document retrieved: {results['ids'][0][0]}")

    print(f"Retrieval time: {end_time - start_time} seconds")

    metaprompt = """You are a helpful and informative bot that answers questions using text from the reference document included below. \
    Be sure to respond in a complete sentence, being comprehensive, including all relevant background information. \
    However, you are talking to a non-technical audience, so be sure to break down complicated concepts and \
    strike a friendly and converstional tone. \
    Use your own knowledge base in addition to the information provided in the document to answer the question. \
    Make relevant assumptions and use your best judgement to answer the question. \
    QUESTION: '{query}'
    PASSAGE: '{relevant_passage}'

    ANSWER:
    """
    start_time = time.time()
    gemini_result = gemini.send_message(message=metaprompt.format(query=quer, relevant_passage=results['documents'][0][0])).text
    end_time = time.time()
    total_end_time = time.time()
    display(Markdown(gemini_result))
    print(f"Gemini time: {end_time - start_time} seconds")
    print(f"Total time: {total_end_time - total_start_time} seconds")
    return gemini_result

In [21]:
def QueryRewriting(query,collection):
    start_time = time.time()
    result = generate_query(query)
    response = generate_responses(result,collection)
    total_end_time = time.time()
    tim = total_end_time - start_time
    print(f"The total time overall by Query Writing is: {tim}")
    return response

In [23]:
response = QueryRewriting("Can you provide details about the marketing plan for Instagram?",collection)

What are the key elements of Instagram's marketing plan, including its target audience, marketing objectives, strategies, tactics, and measurement metrics?

Gemini time: 2.7076053619384766 seconds
Document retrieved: business_docs_Marketing Plan.md
Retrieval time: 0.4200935363769531 seconds


**Target Audience:** Instagram's marketing plan targets small businesses, startups, e-commerce ventures, and brands seeking to engage with a younger demographic through social media platforms.

**Marketing Objectives:** Their primary objectives are to increase brand awareness and reach through targeted campaigns, drive customer engagement, and ultimately drive conversions for client businesses.

**Strategies:** Instagram employs personalized social media marketing strategies tailored to each client's goals and industry trends. They utilize a combination of influencer collaborations, interactive content, and data-driven insights to optimize their marketing campaigns.

**Tactics:** Their tactics include content creation and curation, paid advertising campaigns targeting specific audience segments, and regular performance tracking and analytics to measure campaign effectiveness.

**Measurement Metrics:** Instagram's key performance indicators include website traffic, engagement rates, lead generation, and conversion metrics. They provide monthly reports to clients to review campaign progress and return on investment (ROI).

Gemini time: 5.898318529129028 seconds
Total time: 6.3184120655059814 seconds
The total time overall by Query Writing is: 9.029017925262451


In [25]:
queries = [
    {'query':"What is Instagram's current business proposal?",
     'filename':"Business Proposal.md"},
    {'query':"What is the marketing plan for Instagram?",
     'filename':"Marketing Plan.md"},
    {'query':"What information does the progress report of Instagram contain?",    
    'filename':"Progress Report.md"},
    {'query':"Who are the members of Instagram's board of directors?",
    'filename':"Board of Directors.md"},
    {'query':"What are the diversity and inclusion initiatives implemented by Instagram?",
     'filename':'Diversity, Equity, and Inclusion.md'},
    {'query':"What is the Marketing Objective for Influencer Collaboration Services?",
     'filename':'Marketing Plan.md'},
    {'query':"Who is the target audience of Content Creation and Curation Services?",
     'filename':'Progress Report.md'},
    {'query':"What is the financial update for the Reels Optimization Project?",
     'filename':'Progress Report.md'},
    {'query':"Give me details about the progress report of the Stories Upgrade project.",
     'filename':'Progress Report.md'},
    {'query':"Compare the progress report of the Feed Redesign and Stories Upgrade project and draw a conclusion on the information.",
     'filename':'Progress Report.md'},
    {'query':"List all the achievements of Instagram",
     'filename':'Progress Report.md'},
    {'query':"Compare the progress report of the Feed Redesign and Stories Upgrade project and draw a conclusion on the information.",
     'filename':'Progress Report.md'},
    {'query':"Compare the progress report of the Feed Redesign and Stories Upgrade project and draw a conclusion on the information.",
     'filename':'Progress Report.md'},
    {'query':"Compare the progress report of the Feed Redesign and Stories Upgrade project and draw a conclusion on the information.",
     'filename':'Progress Report.md'},
    {'query':"Compare the progress report of the Feed Redesign and Stories Upgrade project and draw a conclusion on the information.",
     'filename':'Progress Report.md'},
    {'query':"Compare the progress report of the Feed Redesign and Stories Upgrade project and draw a conclusion on the information.",
     'filename':'Progress Report.md'},
    {'query':"Compare the progress report of the Feed Redesign and Stories Upgrade project and draw a conclusion on the information.",
     'filename':'Progress Report.md'},
    {'query':"Compare the progress report of the Feed Redesign and Stories Upgrade project and draw a conclusion on the information.",
     'filename':'Progress Report.md'},
    {'query':"Compare the progress report of the Feed Redesign and Stories Upgrade project and draw a conclusion on the information.",
     'filename':'Progress Report.md'},
    {'query':"Compare the progress report of the Feed Redesign and Stories Upgrade project and draw a conclusion on the information.",
     'filename':'Progress Report.md'},

]


In [26]:
queries

[{'query': "What is Instagram's current business proposal?",
  'filename': 'Business Proposal.md'},
 {'query': 'What is the marketing plan for Instagram?',
  'filename': 'Marketing Plan.md'},
 {'query': 'What information does the progress report of Instagram contain?',
  'filename': 'Progress Report.md'},
 {'query': "Who are the members of Instagram's board of directors?",
  'filename': 'Board of Directors.md'},
 {'query': 'What are the diversity and inclusion initiatives implemented by Instagram?',
  'filename': 'Diversity, Equity, and Inclusion.md'},
 {'query': 'What is the Marketing Objective for Influencer Collaboration Services?',
  'filename': 'Marketing Plan.md'},
 {'query': 'Who is the target audience of Content Creation and Curation Services?',
  'filename': 'Progress Report.md'},
 {'query': 'What is the financial update for the Reels Optimization Project?',
  'filename': 'Progress Report.md'},
 {'query': 'Give me details about the progress report of the Stories Upgrade projec