In [1]:
from dotenv import load_dotenv
import os

# Load the .env file
load_dotenv()

# Get the OPENAI_API_KEY
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [1]:
# !pip install python-dotenv



In [2]:
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Milvus
from langchain_openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from IPython.display import display, Markdown

In [3]:
# account for deprecation of LLM model
import datetime
# Get the current date
current_date = datetime.datetime.now().date()

# Define the date after which the model should be set to "gpt-3.5-turbo"
target_date = datetime.date(2024, 6, 12)

# Set the model variable based on the current date
if current_date > target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"

In [21]:
# %pip install pymilvus==2.2.5

Collecting pymilvus==2.2.5
  Downloading pymilvus-2.2.5-py3-none-any.whl (133 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.6/133.6 kB[0m [31m420.6 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting grpcio<=1.53.0,>=1.49.1
  Downloading grpcio-1.53.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.9/4.9 MB[0m [31m948.4 kB/s[0m eta [36m0:00:00[0m00:01[0m:01[0m
[?25hCollecting grpcio-tools<=1.53.0,>=1.49.1
  Downloading grpcio_tools-1.53.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m804.7 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting mmh3>=2.0
  Downloading mmh3-4.1.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (67 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [3

In [28]:
# %pip install --upgrade --quiet  docx2txt

[33m  DEPRECATION: docx2txt is being installed using the legacy 'setup.py install' method, because it does not have a 'pyproject.toml' and the 'wheel' package is not installed. pip 23.1 will enforce this behaviour change. A possible replacement is to enable the '--use-pep517' option. Discussion can be found at https://github.com/pypa/pip/issues/8559[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


In [4]:
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import Docx2txtLoader

loader = TextLoader("../data/Robinson_Advisory.txt", encoding="windows-1252")
# loader = Docx2txtLoader("../data/Robinson_Advisory.docx")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()

Created a chunk of size 1330, which is longer than the specified 1000


In [5]:
vector_db = Milvus.from_documents(
    docs,
    embeddings,
    connection_args={"host": "192.168.137.236", "port": "19530"},
)

In [12]:
query = "Who are the parties to the Agreement and what are their defined names?"
docs = vector_db.similarity_search(query)

In [13]:
docs[0].page_content

'The parties have participated jointly in the negotiation and drafting of this Agreement.  In the event an ambiguity or question of intent or interpretation arises, this Agreement shall be construed as if drafted jointly by the parties and no presumption or burden of proof shall arise favoring or disfavoring any party by virtue of the authorship of any of the provisions of this Agreement.\n\nSection and subsection headings are not to be considered part of this Agreement, are included solely for convenience, are not intended to be full or accurate descriptions of the content of the Sections or subsections of this Agreement and shall not affect the construction hereof.'

In [14]:
llm = ChatOpenAI(temperature = 0.0, model=llm_model)

In [15]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])


In [16]:
response = llm.call_as_llm(f"{qdocs} Question: Who are the parties to the Agreement and what are their defined names?") 


In [17]:
display(Markdown(response))

The parties to the Agreement are not specified in the given text, so their defined names cannot be determined.

In [13]:
query = "What is the termination notice?"
docs = vector_db.similarity_search(query)

In [21]:
docs[0].page_content

'4. Term: The term of this Agreement shall commence on the Effective Date and shall continue until terminated in accordance with the provisions herein (the "Term").  \n\n5. Termination: Either party, at any given time, may terminate this Agreement, for any reason whatsoever, with or without cause, upon fourteen (14) days’ prior written notice. Notwithstanding the above, the Company may terminate this Agreement immediately and without prior notice if Advisor refuses or is unable to perform the Services, or is in breach of any provision of this Agreement.'

In [22]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])
response = llm.call_as_llm(f"{qdocs} Question: What is the termination notice?") 

In [23]:
display(Markdown(response))

The termination notice is fourteen (14) days' prior written notice, which can be given by either party at any time, for any reason, with or without cause. However, the Company may terminate the Agreement immediately and without prior notice if the Advisor refuses or is unable to perform the Services or is in breach of any provision of the Agreement.