In [1]:
import dspy
from dspy.retrieve.chromadb_rm import ChromadbRM
import os
from get_embedding_function import get_embedding_function
from populate_database import setup_database
from dspy.teleprompt import BootstrapFewShot
import pandas as pd
from dspy.datasets.dataset import Dataset
from dspy.evaluate.evaluate import Evaluate

  from .autonotebook import tqdm as notebook_tqdm
2024-07-26 11:43:18.348714: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-26 11:43:18.806660: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-26 11:43:18.999962: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-26 11:43:19.001364: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-26 11:43:19.

In [20]:
TRAINING_DATA = "queries_judgements.csv"

class CSVDataset(Dataset):
    def __init__(self, file_path, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        
        df = pd.read_csv(file_path)
        self._train = df.iloc[0:14].to_dict(orient='records')

        self._dev = df.iloc[15:22].to_dict(orient='records')

dataset = CSVDataset(TRAINING_DATA)

trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]

print(trainset)


[Example({'question': 'Is there any mention of optimizing storage utilization to reduce energy consumption?', 'answer': 'Yes'}) (input_keys={'question'}), Example({'question': 'Is there any mention of adopting serverless cloud services to optimize resource usage?', 'answer': 'No'}) (input_keys={'question'}), Example({'question': 'Is there any mention of allowing independent scaling of logical components?', 'answer': 'Yes'}) (input_keys={'question'}), Example({'question': 'Is there any mention of scaling down Kubernetes applications when not in use to reduce resource usage?', 'answer': 'Not Applicable'}) (input_keys={'question'}), Example({'question': 'Is there any mention of reducing transmitted data to save energy?', 'answer': 'Yes'}) (input_keys={'question'}), Example({'question': 'Is there any mention of optimizing average CPU utilization to ensure efficient use of resources?', 'answer': 'No'}) (input_keys={'question'}), Example({'question': 'Is there any mention of managing peak CP

In [3]:
CHROMA_PATH = os.getenv("CHROMA_PATH")
DOCUMENT_PATH="./documents/4.pdf"

emb_local = False

setup_database(DOCUMENT_PATH, True, emb_local, True)

embedder, collection_name = get_embedding_function(run_local=emb_local)

retriever_model = ChromadbRM(
    collection_name,
    CHROMA_PATH,
    embedding_function=embedder.embed,
    k=5
)

lm = dspy.OllamaLocal(model='phi3')

dspy.settings.configure(lm=lm, rm=retriever_model)

✨  Database Cleared
Number of existing documents in DB: 0
👉 Adding new documents: 32


In [11]:
retriever = dspy.Retrieve(k=3)
print(retriever("Is there any mention of optimizing the size of AI/ML models to save storage space and reduce memory usage during inference").passages)

['these properties.\n2. Capacity Estimation for Dropbox System Design\nStorage Estimations:\nAssumptions:', 'The total number of users = 500 million.\nTotal number of daily active users = 100 million\nThe average number of files stored by each user = 200\nThe average size of each file = 100 KB\nTotal number of active connections per minute = 1 million\n\nStorage Estimations:\n\nTotal number of files = 500 million * 200 = 100 billion\nTotal storage required = 100 billion * 100 KB = 10 PB\n\n3. High-Level Design(HLD) of Dropbox System Design\n\n\n-----\n\ncommunicates with the Upload Service on the server side. Large files may be broken into smaller chunks\nfor efficient transfer.', '7. Scalabilty for Dropbox System Design\nHorizontal Scaling\nWe can add more servers behind the load balancer to increase the capacity of each service. This is\nknown as Horizontal Scaling and each service can be independently scaled horizontally in our design.\nDatabase Sharding\nMetadata DB is sharded base

In [12]:
class GenerateAnswer(dspy.Signature):
    """Give a concrete 'yes' or 'no' or 'not applicable' answer to a question based on the retrieved context."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="Output nothing else apart from either 'Yes', 'No', or Not Applicable.")

class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
    
    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

#RAG().forward(query)
#print(lm.inspect_history(n=1))
       
def validate_context_and_answer(example, pred, trace=None):
    answer_EM = dspy.evaluate.answer_exact_match(example, pred)
    answer_PM = dspy.evaluate.answer_passage_match(example, pred)
    return answer_EM and answer_PM

In [13]:
result = RAG().forward("Is there any mention of optimizing the size of AI/ML models to save storage space and reduce memory usage during inference?")
print(result.answer)

No


In [14]:
print(lm.inspect_history(n=1))





Give a concrete 'yes' or 'no' or 'not applicable' answer to a question based on the retrieved context.

---

Follow the following format.

Context: may contain relevant facts

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: Output nothing else apart from either Yes, No, or Not Applicable.

---

Context:
[1] «these properties.
2. Capacity Estimation for Dropbox System Design
Storage Estimations:
Assumptions:»
[2] «The total number of users = 500 million.
Total number of daily active users = 100 million
The average number of files stored by each user = 200
The average size of each file = 100 KB
Total number of active connections per minute = 1 million

Storage Estimations:

Total number of files = 500 million * 200 = 100 billion
Total storage required = 100 billion * 100 KB = 10 PB

3. High-Level Design(HLD) of Dropbox System Design


-----

communicates with the Upload Service on the server side. Large files may be broken

In [21]:
teleprompter = BootstrapFewShot(metric=validate_context_and_answer)

compiled_rag = teleprompter.compile(RAG(), trainset=trainset)

evaluate_on_dataset = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5)

#Evaluate the `compiled_rag` program with the `answer_exact_match` metric.
metric = dspy.evaluate.answer_exact_match
evaluate_on_dataset(compiled_rag, metric=metric)


  0%|                                                                                   | 0/14 [00:00<?, ?it/s][A
  7%|█████▎                                                                     | 1/14 [00:39<08:27, 39.03s/it][A
 14%|██████████▋                                                                | 2/14 [01:38<10:11, 51.00s/it][A
 21%|████████████████                                                           | 3/14 [02:13<08:01, 43.78s/it][A
 29%|█████████████████████▍                                                     | 4/14 [02:43<06:22, 38.30s/it][A
 36%|██████████████████████████▊                                                | 5/14 [03:16<05:27, 36.43s/it][A
 43%|████████████████████████████████▏                                          | 6/14 [03:50<04:45, 35.72s/it][A
 50%|█████████████████████████████████████▌                                     | 7/14 [04:25<04:08, 35.44s/it][A
 57%|██████████████████████████████████████████▊                               

Bootstrapped 1 full traces after 14 examples in round 0.



  0%|                                                                                    | 0/7 [00:00<?, ?it/s][A
Average Metric: 0 / 1  (0.0):   0%|                                                      | 0/7 [00:57<?, ?it/s][A
Average Metric: 0 / 1  (0.0):  14%|██████▌                                       | 1/7 [00:57<05:43, 57.26s/it][A
Average Metric: 0 / 2  (0.0):  14%|██████▌                                       | 1/7 [01:54<05:43, 57.26s/it][A
Average Metric: 0 / 2  (0.0):  29%|█████████████▏                                | 2/7 [01:54<04:46, 57.23s/it][A
Average Metric: 0 / 3  (0.0):  29%|█████████████▏                                | 2/7 [02:51<04:46, 57.23s/it][A
Average Metric: 0 / 3  (0.0):  43%|███████████████████▋                          | 3/7 [02:51<03:48, 57.24s/it][A
Average Metric: 0 / 4  (0.0):  43%|███████████████████▋                          | 3/7 [03:48<03:48, 57.24s/it][A
Average Metric: 0 / 4  (0.0):  57%|██████████████████████████▎                 

Average Metric: 0 / 7  (0.0%)



  df.loc[:, metric_name] = df[metric_name].apply(


Unnamed: 0,question,example_answer,context,pred_answer,answer_exact_match
0,Is there any mention of adopting cloud-native network security tools and controls for efficient security?,Yes,"['Each shard is essentially a separate database instance that can be distributed across different servers or\neven different geographic locations.\nSharding helps distribute the load, improve query...",To give a concrete example of how to design a scalable cloud storage service that uses sharding with horizontal scaling while ensuring efficient resource usage...,False
1,Is there any mention of implementing circuit breaker patterns to handle service failures gracefully?,No,"['request. We generally mention availability as 5 Nine’s, 4 Nine’s, etc. 5 Nine’s means 99.999% availability,\n4 Nine means 99.99% availability, and so on.\nDurability: Durability means...",Implementing Circuit Breaker Patterns in Sharded Database Systems with Concrete Example - A Case Study on a Social Media Platform's User Profile Service To give...,False
2,Is there any mention of using asynchronous network calls instead of synchronous ones to reduce waiting times?,Yes,['-----\n\n4.3. Message Queuing Service\nThe messaging service queue will be responsible for the asynchronous communication between the\nclients and the synchronization service.\n\ndesign-dropbox-4\n\nBelow are the main requirements of...,"In a distributed system designed for an eCommerce platform handling millions of transactions daily with thousands of concurrent users and high-volume message processing requirements, implementing...",False
3,Is there any mention of scheduling Kubernetes cron jobs during off-peak hours to optimize resource usage?,No,['7. Scalabilty for Dropbox System Design\nHorizontal Scaling\nWe can add more servers behind the load balancer to increase the capacity of each service. This is\nknown as...,I apologize if my previous response did not directly address your question. It seems there might have been a misunderstanding in the task description as...,False
4,Is there any mention of using DDoS protection to maintain service availability?,No,"['request. We generally mention availability as 5 Nine’s, 4 Nine’s, etc. 5 Nine’s means 99.999% availability,\n4 Nine means 99.99% availability, and so on.\nDurability: Durability means...","Certainly! Here's an example scenario where we can implement horizontal scaling for handling high volumes of messages while maintaining ACID properties using Redis, which is...",False


0.0

In [19]:
print(lm.inspect_history(n=4))





Give a concrete 'yes' or 'no' or 'not applicable' answer to a question based on the retrieved context.

---

Question: Is there any mention of scaling down Kubernetes applications when not in use to reduce resource usage?
Answer: Not Applicable

Question: Is there any mention of managing peak CPU utilization to avoid over-provisioning?
Answer: No

Question: Is there any mention of adopting serverless cloud services to optimize resource usage?
Answer: No

Question: Is there any mention of setting storage retention policies to automatically clean up old data?
Answer: No

Question: Is there any mention of using relevant metrics to dynamically adjust Kubernetes workloads?
Answer: Not Applicable

Question: Is there any mention of scaling infrastructure based on real-time user demand?
Answer: No

Question: Is there any mention of optimizing average CPU utilization to ensure efficient use of resources?
Answer: No

Question: Is there any mention of removing unused assets to optimize perfor

In [23]:
compiled_rag.save("file.json")