# Installation of packages

In [None]:
!pip install openai
!pip install chromadb
!pip install langchain
!pip install tiktoken

# Import Python Packages

In [17]:
import os
import platform
import textwrap
import requests
from typing import List

import openai
import chromadb
import langchain

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import TokenTextSplitter
from langchain.llms import OpenAI
from langchain.chains import ChatVectorDBChain
from langchain.document_loaders import GutenbergLoader
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader

print('Python: ', platform.python_version())

Python:  3.9.16


# Mount Google Drive on Colab

In [18]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# OpenAI API Key

In [19]:
os.environ["OPENAI_API_KEY"] = 'sk-xxxxx'

# Configure Chroma

In [20]:
persist_directory = "/content/drive/My Drive/Colab Notebooks/chroma/romeo"

# Convert Document to Embedding

In [23]:
class GutenbergLoader(BaseLoader):
    """Loader that uses urllib to load .txt web files."""

    def __init__(self, file_path: str):
        """Initialize with file path."""
        if not file_path.startswith("https://open-academy.github.io"):
            raise ValueError("file path must start with 'https://open-academy.github.io'")

        if not file_path.endswith(".md"):
            raise ValueError("file path must end with '.md'")

        self.file_path = file_path

    def load(self) -> List[Document]:
        """Load file."""
        from urllib.request import urlopen

        elements = urlopen(self.file_path)
        text = "\n\n".join([str(el.decode("utf-8-sig")) for el in elements])
        metadata = {"source": self.file_path}
        return [Document(page_content=text, metadata=metadata)]

def get_gutenberg(url):
    loader = GutenbergLoader(url)
    data = loader.load()
    return data

In [27]:
# Downloading the text data from Project Open-academy
modelDeployment_md = 'https://open-academy.github.io/machine-learning/_sources/machine-learning-productionization/model-deployment.md'
modelDeployment_data = get_gutenberg(modelDeployment_md)

# Initializing a TokenTextSplitter object to split the text into chunks of 1000 tokens with 0 token overlap
text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=0)

# Splitting the Romeo and Juliet text into chunks using the TokenTextSplitter object
modelDeployment_doc = text_splitter.split_documents(modelDeployment_data)

# Initializing an OpenAIEmbeddings object for word embeddings
embeddings = OpenAIEmbeddings()

# Generating Chroma vectors from the text chunks using the OpenAIEmbeddings object and persisting them to disk
vectordb = Chroma.from_documents(modelDeployment_doc, embeddings, persist_directory=persist_directory)
# This can be used to explicitly persist the data to disk. It will also be called automatically when the object is destroyed.
vectordb.persist()

Exception ignored in: <function PersistentDuckDB.__del__ at 0x7fcf81f07ca0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/dist-packages/chromadb/db/duckdb.py", line 446, in __del__
  File "/usr/local/lib/python3.9/dist-packages/chromadb/db/duckdb.py", line 399, in persist
duckdb.IOException: IO Error: Could not rename file!


In [None]:
modelDeployment_data

# Configure LangChain QA

In [29]:
romeoandjuliet_qa = ChatVectorDBChain.from_llm(OpenAI(temperature=0, model_name="gpt-3.5-turbo"), vectordb, return_source_documents=True)



In [30]:
# Romeo and Juliet
query = "Have Romeo and Juliet spent the night together? Provide a verbose answer, referencing passages from the book."
chat_history = ''
result = romeoandjuliet_qa({"question": query, "chat_history": chat_history})

In [34]:
query = "How to deployment the data model? Provide a verbose answer, referencing passages from the book."
chat_history = ''
result = romeoandjuliet_qa({"question": query, "chat_history": chat_history})

In [None]:
result["source_documents"] # Vector search engine result

In [35]:
result["answer"] # Answer

'There are several patterns for deploying a Machine Learning model, including "model as module," "model as service," and "model as data." In the "model as module" approach, the model is embedded as a dependency in the application and packaged together as a module. In the "model as service" approach, the model is wrapped in a service that can be deployed independently of the application, allowing for independent updates of the model and application. In the "model as data" approach, the model is treated and published independently, and the application ingests it as data at runtime instead. The book also discusses the importance of version control and automated CI/CD pipelines in the deployment process, as well as the challenges of coordinating scientists, software engineers, data engineers, and business professionals. Additionally, the book covers the evolution of deployment strategies, from basic deployment to container orchestration-based deployment, and the use of Machine Learning as 

In [None]:
def markdown_to_python(markdown_text):
    # Escape quotes and backslashes in the input
    escaped_input = markdown_text.replace("\\", "\\\\").replace("'", "\\'")

    # Generate the Python string
    python_string = f"'{escaped_input}'"

    return python_string

In [None]:
markdown_text = "Generating questions and answers from the book is a straightforward process. To assess the accuracy of the results, I will be comparing the answers with those from SparkNotes. > *SparkNotes editors.* [“Romeo and Juliet” SparkNotes.com](https://www.sparknotes.com/shakespeare/romeojuliet/key-questions-and-answers/), *SparkNotes LLC, 2005* >"
query = markdown_to_python(markdown_text);
result = romeoandjuliet_qa({"question": query, "chat_history": chat_history})
chat_history = chat_history + result["answer"]
result["answer"]

'This is not a question, it is a statement.'

In [None]:
# restart the conversation
chat_history = [("hello", "hello")]
count = 0

In [None]:
# 1st
markdown_text = "I get a number '23333', please give me the completed code in Python which could change the number into a string. In the code."

query = markdown_to_python(markdown_text)
result = romeoandjuliet_qa({"question": query, "chat_history": chat_history})
chat_history = chat_history + [(query, result["answer"])]
formatted_history = "\n".join([f"Question: {q}\nAnswer: {a}" for q, a in chat_history])
wrapped_history = textwrap.fill(formatted_history, width=120)
print(wrapped_history + "\n")
result["answer"]

Question: hello Answer: hello Question: 'I get a number \'23333\', please give me the complete code in Python which
could change the number into a string.' Answer: str(23333) Question: 'I get a number \'23333\', please give me the
complete code in Python which could change the number into a string. In the code, I need you use for loop.' Answer: Yes,
here is the code:  num = 23333 string = ""  for digit in str(num):     string += digit  print(string)



'Yes, here is the code:\n\nnum = 23333\nstring = ""\n\nfor digit in str(num):\n    string += digit\n\nprint(string)'

In [None]:
# 2nd
query = "However, you can not uss the str function. Show me the code again."
result = romeoandjuliet_qa({"question": query, "chat_history": chat_history})
chat_history = chat_history + [(query, result["answer"])]
result["answer"]

'Yes, here is a code in Python that can change the number \'23333\' into a string using a for loop, but without using the str function:\n\n```\nnum = 23333\nstring = ""\n\nfor digit in str(num):\n    string += chr(ord(\'0\') + int(digit))\n\nprint(string)\n```\n\nThis code converts each digit of the number into its corresponding ASCII character code and then concatenates them to form a string.'

In [None]:
# 3rd
query = "Romeo and Juliet are not lovers? Provide a verbose answer, referencing passages from the book."
result = romeoandjuliet_qa({"question": query, "chat_history": chat_history})
chat_history = chat_history + [(query, result["answer"])]
result["answer"]

'No, there is no evidence in the given context that suggests Romeo and Juliet are not lovers. On the contrary, the Chorus describes them as being in love and the dialogue between Juliet and her Nurse reveals her intense feelings for Romeo despite him being a member of the enemy Montague family.'

In [None]:
# restart the conversation
chat_history = [("", "")]
count = 0

# while loop for typing
while 1:
  markdown_text = input("\nQuery[{}]:".format(count))
  query = markdown_to_python(markdown_text)
  result = romeoandjuliet_qa({"question": query, "chat_history": chat_history})
  chat_history = chat_history + [(query, result["answer"])]
  formatted_history = "\n".join([f"Question: {q}\nAnswer: {a}" for q, a in chat_history])
  wrapped_history = textwrap.fill(formatted_history, width=120)
  print(wrapped_history + "\n")
  result["answer"]