In [1]:
import dspy
from dspy.retrieve.chromadb_rm import ChromadbRM
import os
from get_embedding_function import get_embedding_function
from populate_database import setup_database
from dspy.teleprompt import BootstrapFewShot
import pandas as pd
from dspy.datasets.dataset import Dataset
from dspy.evaluate.evaluate import Evaluate

  from .autonotebook import tqdm as notebook_tqdm
2024-07-29 14:14:58.065541: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-29 14:14:58.514380: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-29 14:14:58.716134: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-29 14:14:58.717543: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-29 14:14:59.

In [9]:
TRAINING_DATA = "queries_judgements.csv"

class CSVDataset(Dataset):
    def __init__(self, file_path, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        
        df = pd.read_csv(file_path)
        self._train = df.iloc[0:7].to_dict(orient='records')

        self._dev = df.iloc[7:14].to_dict(orient='records')

dataset = CSVDataset(TRAINING_DATA)

trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]

print(trainset)


[Example({'question': 'Is there any mention of removing unused assets to optimize performance?', 'answer': 'No'}) (input_keys={'question'}), Example({'question': 'Is there any mention of optimizing average CPU utilization to ensure efficient use of resources?', 'answer': 'No'}) (input_keys={'question'}), Example({'question': 'Is there any mention of optimizing storage utilization to reduce energy consumption?', 'answer': 'Yes'}) (input_keys={'question'}), Example({'question': 'Is there any mention of minimizing the total number of deployed environments to save resources?', 'answer': 'No'}) (input_keys={'question'}), Example({'question': 'Is there any mention of scaling down Kubernetes applications when not in use to reduce resource usage?', 'answer': 'Not Applicable'}) (input_keys={'question'}), Example({'question': 'Is there any mention of managing peak CPU utilization to avoid over-provisioning?', 'answer': 'No'}) (input_keys={'question'}), Example({'question': 'Is there any mention 

In [10]:
CHROMA_PATH = os.getenv("CHROMA_PATH")
DOCUMENT_PATH="./documents/4.pdf"

emb_local = False

setup_database(DOCUMENT_PATH, True, emb_local, True)

embedder, collection_name = get_embedding_function(run_local=emb_local)

retriever_model = ChromadbRM(
    collection_name,
    CHROMA_PATH,
    embedding_function=embedder.embed,
    k=5
)

lm = dspy.OllamaLocal(model='phi3')

dspy.settings.configure(lm=lm, rm=retriever_model)

✨  Database Cleared
Number of existing documents in DB: 32
✅ No new documents to add


In [11]:
retriever = dspy.Retrieve(k=3)
print(retriever("Is there any mention of optimizing the size of AI/ML models to save storage space and reduce memory usage during inference").passages)

['these properties.\n2. Capacity Estimation for Dropbox System Design\nStorage Estimations:\nAssumptions:', 'The total number of users = 500 million.\nTotal number of daily active users = 100 million\nThe average number of files stored by each user = 200\nThe average size of each file = 100 KB\nTotal number of active connections per minute = 1 million\n\nStorage Estimations:\n\nTotal number of files = 500 million * 200 = 100 billion\nTotal storage required = 100 billion * 100 KB = 10 PB\n\n3. High-Level Design(HLD) of Dropbox System Design\n\n\n-----\n\ncommunicates with the Upload Service on the server side. Large files may be broken into smaller chunks\nfor efficient transfer.', '7. Scalabilty for Dropbox System Design\nHorizontal Scaling\nWe can add more servers behind the load balancer to increase the capacity of each service. This is\nknown as Horizontal Scaling and each service can be independently scaled horizontally in our design.\nDatabase Sharding\nMetadata DB is sharded base

In [12]:
class GenerateAnswer(dspy.Signature):
    """Give a concrete yes or no or not applicable answer to a question based on the retrieved context."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="Output nothing else apart from either yes, no, or not applicable.")

class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
    
    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

#RAG().forward(query)
#print(lm.inspect_history(n=1))
       
def validate_context_and_answer(example, pred, trace=None):
    answer_EM = dspy.evaluate.answer_exact_match(example, pred)
    answer_PM = dspy.evaluate.answer_passage_match(example, pred)
    return answer_EM and answer_PM

In [13]:
result = RAG().forward("Is there any mention of optimizing the size of AI/ML models to save storage space and reduce memory usage during inference?")
print(result.answer)

No


In [14]:
print(lm.inspect_history(n=1))





Give a concrete yes or no or not applicable answer to a question based on the retrieved context.

---

Follow the following format.

Context: may contain relevant facts

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: Output nothing else apart from either yes, no, or not applicable.

---

Context:
[1] «these properties.
2. Capacity Estimation for Dropbox System Design
Storage Estimations:
Assumptions:»
[2] «The total number of users = 500 million.
Total number of daily active users = 100 million
The average number of files stored by each user = 200
The average size of each file = 100 KB
Total number of active connections per minute = 1 million

Storage Estimations:

Total number of files = 500 million * 200 = 100 billion
Total storage required = 100 billion * 100 KB = 10 PB

3. High-Level Design(HLD) of Dropbox System Design


-----

communicates with the Upload Service on the server side. Large files may be broken into 

In [15]:
teleprompter = BootstrapFewShot(metric=validate_context_and_answer)

compiled_rag = teleprompter.compile(RAG(), trainset=trainset)

evaluate_on_dataset = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5)

#Evaluate the `compiled_rag` program with the `answer_exact_match` metric.
metric = dspy.evaluate.answer_exact_match
evaluate_on_dataset(compiled_rag, metric=metric)

 14%|██████████████████                                                                                                            | 1/7 [01:22<08:17, 82.90s/it]

Failed to run or to evaluate example Example({'question': 'Is there any mention of removing unused assets to optimize performance?', 'answer': 'No'}) (input_keys={'question'}) with <function validate_context_and_answer at 0x7f3e00acf740> due to Expected embeddings to be a list, got dict.


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [04:18<00:00, 36.98s/it]


Bootstrapped 0 full traces after 7 examples in round 0.


Average Metric: 3 / 7  (42.9): 100%|███████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [03:41<00:00, 31.63s/it]

Average Metric: 3 / 7  (42.9%)



  df.loc[:, metric_name] = df[metric_name].apply(


Unnamed: 0,question,example_answer,context,pred_answer,answer_exact_match
0,Is there any mention of reducing transmitted data to save energy?,Yes,"['The synchronization service receives the request from the request queue of the messaging services and\nupdates the metadata database with the latest changes.\nAlso, the synchronization service...",No,False
1,Is there any mention of allowing independent scaling of logical components?,Yes,"['Scaling can be achieved using techniques such as vertical scaling (increasing hardware capabilities) or\nhorizontal scaling (adding more machines).\nHowever, horizontal scaling for relational databases often involves...","No, because the context discusses horizontal scaling for relational databases and does not specifically address the independence of scaling individual pieces of physical hardware like...",False
2,Is there any mention of using relevant metrics to dynamically adjust Kubernetes workloads?,Not Applicable,['7. Scalabilty for Dropbox System Design\nHorizontal Scaling\nWe can add more servers behind the load balancer to increase the capacity of each service. This is\nknown as...,No,False
3,Is there any mention of scaling infrastructure based on real-time user demand?,No,['7. Scalabilty for Dropbox System Design\nHorizontal Scaling\nWe can add more servers behind the load balancer to increase the capacity of each service. This is\nknown as...,No,✔️ [True]
4,Is there any mention of setting storage retention policies to automatically clean up old data?,No,"['request. We generally mention availability as 5 Nine’s, 4 Nine’s, etc. 5 Nine’s means 99.999% availability,\n4 Nine means 99.99% availability, and so on.\nDurability: Durability means...",No,✔️ [True]


42.86

In [16]:
print(lm.inspect_history(n=4))





Give a concrete yes or no or not applicable answer to a question based on the retrieved context.

---

Question: Is there any mention of scaling down applications during idle periods to minimize resource usage?
Answer: No

Question: Is there any mention of removing unused assets to optimize performance?
Answer: No

Question: Is there any mention of minimizing the total number of deployed environments to save resources?
Answer: No

Question: Is there any mention of scaling down Kubernetes applications when not in use to reduce resource usage?
Answer: Not Applicable

Question: Is there any mention of optimizing storage utilization to reduce energy consumption?
Answer: Yes

Question: Is there any mention of optimizing average CPU utilization to ensure efficient use of resources?
Answer: No

Question: Is there any mention of managing peak CPU utilization to avoid over-provisioning?
Answer: No

---

Follow the following format.

Context: may contain relevant facts

Question: ${question}

In [17]:
metric = dspy.evaluate.answer_exact_match
evaluate_on_dataset(RAG(), metric=metric)

Average Metric: 4 / 7  (57.1): 100%|███████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [02:55<00:00, 25.04s/it]

Average Metric: 4 / 7  (57.1%)



  df.loc[:, metric_name] = df[metric_name].apply(


Unnamed: 0,question,example_answer,context,pred_answer,answer_exact_match
0,Is there any mention of reducing transmitted data to save energy?,Yes,"['The synchronization service receives the request from the request queue of the messaging services and\nupdates the metadata database with the latest changes.\nAlso, the synchronization service...",No,False
1,Is there any mention of allowing independent scaling of logical components?,Yes,"['Scaling can be achieved using techniques such as vertical scaling (increasing hardware capabilities) or\nhorizontal scaling (adding more machines).\nHowever, horizontal scaling for relational databases often involves...",No,False
2,Is there any mention of using relevant metrics to dynamically adjust Kubernetes workloads?,Not Applicable,['7. Scalabilty for Dropbox System Design\nHorizontal Scaling\nWe can add more servers behind the load balancer to increase the capacity of each service. This is\nknown as...,Not applicable,✔️ [True]
3,Is there any mention of scaling infrastructure based on real-time user demand?,No,['7. Scalabilty for Dropbox System Design\nHorizontal Scaling\nWe can add more servers behind the load balancer to increase the capacity of each service. This is\nknown as...,No,✔️ [True]
4,Is there any mention of setting storage retention policies to automatically clean up old data?,No,"['request. We generally mention availability as 5 Nine’s, 4 Nine’s, etc. 5 Nine’s means 99.999% availability,\n4 Nine means 99.99% availability, and so on.\nDurability: Durability means...",No,✔️ [True]


57.14

In [23]:
compiled_rag.save("file.json")