Skip to content

Commit

Permalink
Merge pull request #9 from FederalCSUMission/VectorDB_IPYNB_Yassin
Browse files Browse the repository at this point in the history
Added notebook for querying VDB w/o reloading docs
  • Loading branch information
josephyassin committed Sep 11, 2023
2 parents a91e0d5 + 4e986af commit b819da4
Show file tree
Hide file tree
Showing 2 changed files with 299 additions and 3 deletions.
6 changes: 3 additions & 3 deletions 10-VectorDB_Load.ipynb → 11-VectorDB_Load.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -189,10 +189,10 @@
"import weaviate\n",
"\n",
"WEAVIATE_URL = os.environ[\"VECTOR_DB_WEAVIATE_URL\"] \n",
"WEAVIATE_API_KEY = os.environ[\"VECOTR_DB_WEVIATE_API_KEY\"]\n",
"WEAVIATE_API_KEY = os.environ[\"VECTOR_DB_WEVIATE_API_KEY\"]\n",
"\n",
"client = weaviate.Client(url=WEAVIATE_URL, auth_client_secret=weaviate.AuthApiKey(WEAVIATE_API_KEY))\n",
"vectorstore = Weaviate.from_documents(docs, embeddings, client=client, by_text=False)"
"vectorstore = Weaviate.from_documents(docs, embeddings, client=client, by_text=False, index_name=\"arxivcs_index\")"
]
},
{
Expand Down Expand Up @@ -256,7 +256,7 @@
"\n",
"llm = AzureChatOpenAI(\n",
" openai_api_base=os.environ[\"AZURE_OPENAI_ENDPOINT\"] ,\n",
" openai_api_version=\"2023-05-15\",\n",
" openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"],\n",
" deployment_name=os.environ[\"AZURE_OPENAI_LLM_DEPLOYMENT\"],\n",
" openai_api_key=os.environ[\"AZURE_OPENAI_API_KEY\"],\n",
" openai_api_type=\"azure\",\n",
Expand Down
296 changes: 296 additions & 0 deletions 12-VectorDB_QA.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,296 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"# Question and answer LLM using the Vector Database data as context"
]
},
{
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"Prerequites: \n",
"\n",
"1. This notebook assumes you've completed the previous notebook and have data loaded into your vector store. \n",
"\n",
"2. Azure openAI endpoint\n",
" Confirm that you've deployed both an embedding model and a LLM. https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource?pivots=web-portal\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"outputs_hidden": true,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"%pip install weaviate-client\n",
"%pip install langchain\n",
"%pip install openai[datalib]\n",
"%pip install tiktoken\n",
"%pip install python-dotenv"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"gather": {
"logged": 1694446501722
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"from dotenv import load_dotenv\n",
"load_dotenv(\"credentials.env\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1694446501905
},
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"from langchain.vectorstores import Weaviate\n",
"import weaviate\n",
"\n",
"WEAVIATE_URL = os.environ[\"VECTOR_DB_WEAVIATE_URL\"]\n",
"WEAVIATE_API_KEY = os.environ[\"WEAVIATE_API_KEY\"]\n",
"\n",
"#create client to inte ract with Weaviate\n",
"client = weaviate.Client(url=WEAVIATE_URL, auth_client_secret=weaviate.AuthApiKey(WEAVIATE_API_KEY))\n",
"#Print schemas in weaviate, you should see your index from the previous notebook named \"arxivcs_index\"\n",
"client.schema.get()"
]
},
{
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"### Set embedding parameters and LLM paramters"
]
},
{
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"We are not embedding documents into Weaviate but we still need the embedding model to convert our prompt into a vector and do a similarity search. Make sure you are using the same embedding model here as you did to write data to the vector database. "
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"gather": {
"logged": 1694446502048
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"#embedding parameters, should be the same as what was used to embed documents into weaviate\n",
"import openai\n",
"\n",
"openai.api_type = \"azure\"\n",
"openai.api_key = os.environ['openai_api_key']\n",
"openai.api_base = os.environ[\"AZURE_OPENAI_ENDPOINT\"]\n",
"openai.api_version = os.environ[\"AZURE_OPENAI_API_VERSION\"]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"gather": {
"logged": 1694446502196
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"#LLM Paramaters\n",
"from langchain.chat_models import AzureChatOpenAI\n",
"\n",
"llm = AzureChatOpenAI(\n",
" openai_api_base=os.environ[\"AZURE_OPENAI_ENDPOINT\"],\n",
" openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"],\n",
" deployment_name=os.environ[\"AZURE_OPENAI_LLM_DEPLOYMENT\"],\n",
" openai_api_key=os.environ['openai_api_key'],\n",
" openai_api_type=\"azure\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"gather": {
"logged": 1694446250628
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [
{
"data": {
"text/plain": [
"'The main themes in these retrieved documents are quantum mechanics, measurement in quantum mechanics, the limitations of quantum equations and amplitudes, the physical meaning and interpretation of long numbers in quantum computing, and the untestable nature of claims about individual mega-states in quantum computing. The documents also discuss the relationship between quantum mechanics and materialism, as well as the challenges and limitations of quantum computing.'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain import PromptTemplate, LLMChain\n",
"\n",
"# Prompt\n",
"prompt = PromptTemplate.from_template(\n",
" \"Summarize the main themes in these retrieved docs: {docs}\"\n",
")\n",
"\n",
"# Chain\n",
"llm_chain = LLMChain(llm=llm, prompt=prompt)\n",
"\n",
"#Converting our input question to an Embedding to use for search\n",
"response = openai.Embedding.create(\n",
" input=\"What is Quantum mechanics?\",\n",
" engine=\"textembedding\"\n",
")\n",
"embeddings = response['data'][0]['embedding']\n",
"\n",
"# Run\n",
"db = Weaviate(client=client, index_name=\"arxivcs_index\", text_key=\"text\")\n",
"docs = db.similarity_search_by_vector(embedding=embeddings)\n",
"result = llm_chain(docs)\n",
"\n",
"# Output\n",
"result['text']"
]
}
],
"metadata": {
"kernel_info": {
"name": "python310-sdkv2"
},
"kernelspec": {
"display_name": "Python 3.10 - SDK v2",
"language": "python",
"name": "python310-sdkv2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
},
"microsoft": {
"host": {
"AzureML": {
"notebookHasBeenCompleted": true
}
},
"ms_spell_check": {
"ms_spell_check_language": "en"
}
},
"nteract": {
"version": "nteract-front-end@1.0.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit b819da4

Please sign in to comment.