In [1]:
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.chroma import Chroma
import os
import shutil

CHROMA_PATH = "chroma"
DATA_PATH = "./data/"

In [2]:
def generate_data_store():
    documents = load_documents()
    chunks = split_text(documents)
    save_to_chroma(chunks)

In [3]:
def load_documents():
    loader = DirectoryLoader(DATA_PATH, glob="*.md")
    documents = loader.load()
    return documents


In [4]:
def split_text(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=300,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True,
    )
    chunks = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(chunks)} chunks.")

    document = chunks[0]
    print(document.page_content)
    print(document.metadata)

    return chunks


In [5]:
def save_to_chroma(chunks: list[Document]):
    # Clear out the database first.
    if os.path.exists(CHROMA_PATH):
        shutil.rmtree(CHROMA_PATH)

    # Create a new DB from the documents.
    db = Chroma.from_documents(
        chunks, OpenAIEmbeddings(openai_api_key="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"), persist_directory=CHROMA_PATH
    )
    db.persist()
    print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")


In [6]:
import os
from langchain.embeddings import OpenAIEmbeddings

os.environ["OPENAI_API_KEY"] = "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"
api_key = os.getenv("OPENAI_API_KEY")

embeddings = OpenAIEmbeddings(openai_api_key=api_key)


  warn_deprecated(


In [7]:
# Generates Data Store 
generate_data_store()


Split 1 documents into 22 chunks.
Open AI Chat with new GPT-3.5

Pre-requisites

Open AI Account

http://chat.openai.com

Get a developer key

What is GPT 3.5?
{'source': 'data\\chatopenai.md', 'start_index': 0}
Saved 22 chunks to chroma.


  warn_deprecated(


In [8]:
# RAG implementation

import argparse
from dataclasses import dataclass
from langchain.vectorstores.chroma import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

CHROMA_PATH = "chroma"

PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Answer the question based on the above context: {question}
"""



In [20]:
# Query text
query_text = " Which model is better model?"

In [21]:
# Prepare the DB.
embedding_function = OpenAIEmbeddings()
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

    

In [22]:
# Search the DB.
results = db.similarity_search_with_relevance_scores(query_text, k=3)
print(results)

[(Document(metadata={'source': 'data\\chatopenai.md', 'start_index': 2042}, page_content='Get the best model from the AutoML run\n\nbest_model = automl_run.get_output()\n\nEvaluate the performance of the best model\n\nperf = best_model.predict(test.drop_columns(columns=["survived"]))\nprint(perf.auc())\n```\n\nNow next to try Deep learning models\n\nSo here is my question\n\n```'), 0.6685426697032746), (Document(metadata={'source': 'data\\chatopenai.md', 'start_index': 315}, page_content='code-davinci-002 is a base model, so good for pure code-completion tasks\ntext-davinci-002 is an InstructGPT model based on code-davinci-002\ntext-davinci-003 is an improvement on text-davinci-002\n```\n\nInformation available at https://beta.openai.com/docs/model-index-for-researchers'), 0.6529310657854615), (Document(metadata={'source': 'data\\chatopenai.md', 'start_index': 3011}, page_content='Evaluate the performance of the model\n\n_, acc = model.evaluate(x=test.drop("survived", axis=1), y=test["

In [23]:
# Augument the context
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context=context_text, question=query_text)
print(prompt)


Human: 
Answer the question based only on the following context:

Get the best model from the AutoML run

best_model = automl_run.get_output()

Evaluate the performance of the best model

perf = best_model.predict(test.drop_columns(columns=["survived"]))
print(perf.auc())
```

Now next to try Deep learning models

So here is my question

```

---

code-davinci-002 is a base model, so good for pure code-completion tasks
text-davinci-002 is an InstructGPT model based on code-davinci-002
text-davinci-003 is an improvement on text-davinci-002
```

Information available at https://beta.openai.com/docs/model-index-for-researchers

---

Evaluate the performance of the model

_, acc = model.evaluate(x=test.drop("survived", axis=1), y=test["survived"])
print("Test accuracy:", acc)
```

Here is another open source model called yolov5

question - Can you create me a yolov5 deep learning code?

```

Import the necessary libraries

---

Answer the question based on the above context:  Which model i

In [24]:
 
# Generate the response
   
model = ChatOpenAI()
response_text = model.predict(prompt)

sources = [doc.metadata.get("source", None) for doc, _score in results]
formatted_response = f"Response: {response_text}\nSources: {sources}"
print(formatted_response)

Response: Based on the provided context, the best model is the one obtained from the AutoML run. The code snippet evaluates the performance of the best model by using it to predict on the test data and then calculating the AUC score. Therefore, the best model from the AutoML run is considered superior in this context.
Sources: ['data\\chatopenai.md', 'data\\chatopenai.md', 'data\\chatopenai.md']
