Implement Chroma Support (#1)

Mintplex-Labs · Jun 8, 2023 · 6d01970 · 6d01970
1 parent 6b48e81
commit 6d01970
Show file tree

Hide file tree

Showing 27 changed files with 1,116 additions and 509 deletions.
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/tim.svg?style=social&label=Follow%20%40Timothy%20Carambat)](https://twitter.com/tcarambat) [![](https://dcbadge.vercel.app/api/server/6UyHPeGZAC?compact=true&style=flat)](https://discord.gg/6UyHPeGZAC)
 
-A full-stack application and tool suite that enables you to turn any document, resource, or piece of content into a piece of data that any LLM can use as reference during chatting. This application runs with very minimal overhead as by default the LLM and vectorDB are hosted remotely, but can be swapped for local instances. Currently this project supports Pinecone and OpenAI.
+A full-stack application and tool suite that enables you to turn any document, resource, or piece of content into a piece of data that any LLM can use as reference during chatting. This application runs with very minimal overhead as by default the LLM and vectorDB are hosted remotely, but can be swapped for local instances. Currently this project supports Pinecone & ChromaDB for vector storage and OpenAI for chatting.
 
 ![Chatting](/images/screenshots/chat.png)
 [view more screenshots](/images/screenshots/SCREENSHOTS.md)
@@ -38,7 +38,7 @@ This monorepo consists of three main sections:
 - `yarn` and `node` on your machine
 - `python` 3.8+ for running scripts in `collector/`.
 - access to an LLM like `GPT-3.5`, `GPT-4`*.
-- a [Pinecone.io](https://pinecone.io) free account*.
+- a [Pinecone.io](https://pinecone.io) free account* **or** Local Chroma instance running.
 *you can use drop in replacements for these. This is just the easiest to get up and running fast.
 
 ### How to get started

diff --git a/frontend/src/components/DefaultChat/index.jsx b/frontend/src/components/DefaultChat/index.jsx
@@ -16,9 +16,8 @@ export default function DefaultChatContainer() {
   const MESSAGES = [
     <React.Fragment>
       <div
-        className={`flex w-full mt-2 justify-start ${
-          popMsg ? "chat__message" : ""
-        }`}
+        className={`flex w-full mt-2 justify-start ${popMsg ? "chat__message" : ""
+          }`}
       >
         <div className="p-4 max-w-[75%] bg-orange-100 dark:bg-stone-700 rounded-b-2xl rounded-tr-2xl rounded-tl-sm">
           <p className="text-slate-800 dark:text-slate-200 font-semibold">
@@ -34,9 +33,8 @@ export default function DefaultChatContainer() {
 
     <React.Fragment>
       <div
-        className={`flex w-full mt-2 justify-start ${
-          popMsg ? "chat__message" : ""
-        }`}
+        className={`flex w-full mt-2 justify-start ${popMsg ? "chat__message" : ""
+          }`}
       >
         <div className="p-4 max-w-[75%] bg-orange-100 dark:bg-stone-700 rounded-b-2xl rounded-tr-2xl rounded-tl-sm">
           <p className="text-slate-800 dark:text-slate-200 font-semibold">
@@ -51,17 +49,16 @@ export default function DefaultChatContainer() {
 
     <React.Fragment>
       <div
-        className={`flex w-full mt-2 justify-start ${
-          popMsg ? "chat__message" : ""
-        }`}
+        className={`flex w-full mt-2 justify-start ${popMsg ? "chat__message" : ""
+          }`}
       >
         <div className="p-4 max-w-[75%] bg-orange-100 dark:bg-stone-700 rounded-b-2xl rounded-tr-2xl rounded-tl-sm">
           <p className="text-slate-800 dark:text-slate-200 font-semibold">
             AnythingLLM can run totally locally on your machine with little
             overhead you wont even notice it's there! No GPU needed. Cloud and
-            on-premises installtion is available as well.
+            on-premises installation is available as well.
             <br />
-            The AI tooling ecosytem gets more powerful everyday. AnythingLLM
+            The AI tooling ecosystem gets more powerful everyday. AnythingLLM
             makes it easy to use.
           </p>
           <a
@@ -79,9 +76,8 @@ export default function DefaultChatContainer() {
 
     <React.Fragment>
       <div
-        className={`flex w-full mt-2 justify-end ${
-          popMsg ? "chat__message" : ""
-        }`}
+        className={`flex w-full mt-2 justify-end ${popMsg ? "chat__message" : ""
+          }`}
       >
         <div className="p-4 max-w-[75%] bg-slate-200 dark:bg-amber-800 rounded-b-2xl rounded-tl-2xl rounded-tr-sm">
           <p className="text-slate-800 dark:text-slate-200 font-semibold">
@@ -93,9 +89,8 @@ export default function DefaultChatContainer() {
 
     <React.Fragment>
       <div
-        className={`flex w-full mt-2 justify-start ${
-          popMsg ? "chat__message" : ""
-        }`}
+        className={`flex w-full mt-2 justify-start ${popMsg ? "chat__message" : ""
+          }`}
       >
         <div className="p-4 max-w-[75%] bg-orange-100 dark:bg-stone-700 rounded-b-2xl rounded-tr-2xl rounded-tl-sm">
           <p className="text-slate-800 dark:text-slate-200 font-semibold">
@@ -122,24 +117,22 @@ export default function DefaultChatContainer() {
 
     <React.Fragment>
       <div
-        className={`flex w-full mt-2 justify-end ${
-          popMsg ? "chat__message" : ""
-        }`}
+        className={`flex w-full mt-2 justify-end ${popMsg ? "chat__message" : ""
+          }`}
       >
         <div className="p-4 max-w-[75%] bg-slate-200 dark:bg-amber-800 rounded-b-2xl rounded-tl-2xl rounded-tr-sm">
           <p className="text-slate-800 dark:text-slate-200 font-semibold">
             Is this like an AI dropbox or something? What about chatting? It is
-            a chatbot isnt it?
+            a chatbot isn't it?
           </p>
         </div>
       </div>
     </React.Fragment>,
 
     <React.Fragment>
       <div
-        className={`flex w-full mt-2 justify-start ${
-          popMsg ? "chat__message" : ""
-        }`}
+        className={`flex w-full mt-2 justify-start ${popMsg ? "chat__message" : ""
+          }`}
       >
         <div className="p-4 max-w-[75%] bg-orange-100 dark:bg-stone-700 rounded-b-2xl rounded-tr-2xl rounded-tl-sm">
           <p className="text-slate-800 dark:text-slate-200 font-semibold">
@@ -168,9 +161,8 @@ export default function DefaultChatContainer() {
 
     <React.Fragment>
       <div
-        className={`flex w-full mt-2 justify-end ${
-          popMsg ? "chat__message" : ""
-        }`}
+        className={`flex w-full mt-2 justify-end ${popMsg ? "chat__message" : ""
+          }`}
       >
         <div className="p-4 max-w-[75%] bg-slate-200 dark:bg-amber-800 rounded-b-2xl rounded-tl-2xl rounded-tr-sm">
           <p className="text-slate-800 dark:text-slate-200 font-semibold">
@@ -182,9 +174,8 @@ export default function DefaultChatContainer() {
 
     <React.Fragment>
       <div
-        className={`flex w-full mt-2 justify-start ${
-          popMsg ? "chat__message" : ""
-        }`}
+        className={`flex w-full mt-2 justify-start ${popMsg ? "chat__message" : ""
+          }`}
       >
         <div className="p-4 max-w-[75%] bg-orange-100 dark:bg-stone-700 rounded-b-2xl rounded-tr-2xl rounded-tl-sm">
           <p className="text-slate-800 dark:text-slate-200 font-semibold">

diff --git a/frontend/src/components/Modals/Keys.jsx b/frontend/src/components/Modals/Keys.jsx
@@ -74,20 +74,38 @@ export default function KeysModal({ hideModal = noop }) {
                 />
                 <div className="h-[2px] w-full bg-gray-200 dark:bg-stone-600" />
                 <ShowKey
-                  name="Pinecone DB API Key"
-                  value={settings?.PineConeKey ? "*".repeat(20) : ""}
-                  valid={!!settings?.PineConeKey}
-                />
-                <ShowKey
-                  name="Pinecone DB Environment"
-                  value={settings?.PineConeEnvironment}
-                  valid={!!settings?.PineConeEnvironment}
-                />
-                <ShowKey
-                  name="Pinecone DB Index"
-                  value={settings?.PinceConeIndex}
-                  valid={!!settings?.PinceConeIndex}
+                  name="Vector DB Choice"
+                  value={settings?.VectorDB}
+                  valid={!!settings?.VectorDB}
                 />
+                {settings?.VectorDB === "pinecone" && (
+                  <>
+                    <ShowKey
+                      name="Pinecone DB API Key"
+                      value={settings?.PineConeKey ? "*".repeat(20) : ""}
+                      valid={!!settings?.PineConeKey}
+                    />
+                    <ShowKey
+                      name="Pinecone DB Environment"
+                      value={settings?.PineConeEnvironment}
+                      valid={!!settings?.PineConeEnvironment}
+                    />
+                    <ShowKey
+                      name="Pinecone DB Index"
+                      value={settings?.PineConeIndex}
+                      valid={!!settings?.PineConeIndex}
+                    />
+                  </>
+                )}
+                {settings?.VectorDB === "chroma" && (
+                  <>
+                    <ShowKey
+                      name="Chroma Endpoint"
+                      value={settings?.ChromaEndpoint}
+                      valid={!!settings?.ChromaEndpoint}
+                    />
+                  </>
+                )}
               </div>
             )}
           </div>

diff --git a/frontend/src/components/Modals/ManageWorkspace.jsx b/frontend/src/components/Modals/ManageWorkspace.jsx
@@ -41,7 +41,7 @@ export default function ManageWorkspace({ hideModal = noop, workspace }) {
   const deleteWorkspace = async () => {
     if (
       !window.confirm(
-        `You are about to delete your entire ${workspace.name} workspace. This will remove all vector embeddings on your vector database.\n\nThe original source files will remiain untouched. This action is irreversible.`
+        `You are about to delete your entire ${workspace.name} workspace. This will remove all vector embeddings on your vector database.\n\nThe original source files will remain untouched. This action is irreversible.`
       )
     )
       return false;

diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/PromptReply/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/PromptReply/index.jsx
@@ -2,6 +2,7 @@ import { memo, useEffect, useRef, useState } from "react";
 import { AlertTriangle } from "react-feather";
 import Jazzicon from "../../../../UserIcon";
 import { decode as HTMLDecode } from "he";
+import { v4 } from "uuid";
 
 function PromptReply({
   uuid,

diff --git a/package.json b/package.json
@@ -6,6 +6,7 @@
   "author": "Timothy Carambat (Mintplex Labs)",
   "license": "MIT",
   "scripts": {
+    "lint": "cd server && yarn lint && cd .. && cd frontend && yarn lint",
     "setup": "cd server && yarn && cd .. && yarn setup:envs && echo \"Please run yarn dev:server and yarn dev:frontend in separate terminal tabs.\"",
     "setup:envs": "cd server && cp -n .env.example .env.development && cd ../collector && cp -n .env.example .env && cd ..",
     "dev:server": "cd server && yarn dev",

diff --git a/server/.env.example b/server/.env.example
@@ -1,8 +1,15 @@
 SERVER_PORT=5000
 OPEN_AI_KEY=
 OPEN_MODEL_PREF='gpt-3.5-turbo'
+# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
+CACHE_VECTORS="true"
+
+# Enable all below if you are using vector database: Chroma.
+# VECTOR_DB="chroma"
+# CHROMA_ENDPOINT='http://localhost:8000'
+
+# Enable all below if you are using vector database: Pinecone.
+VECTOR_DB="pinecone"
 PINECONE_ENVIRONMENT=
 PINECONE_API_KEY=
-PINECONE_INDEX=
-AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
-CACHE_VECTORS="true"
+PINECONE_INDEX=
diff --git a/server/.nvmrc b/server/.nvmrc
@@ -1 +1 @@
-v18.12.1
+v18.13.0
diff --git a/server/endpoints/chat.js b/server/endpoints/chat.js
@@ -1,13 +1,13 @@
-const { reqBody } = require('../utils/http');
-const { Workspace } = require('../models/workspace');
-const { chatWithWorkspace } = require('../utils/chats');
+const { reqBody } = require("../utils/http");
+const { Workspace } = require("../models/workspace");
+const { chatWithWorkspace } = require("../utils/chats");
 
 function chatEndpoints(app) {
   if (!app) return;
 
-  app.post('/workspace/:slug/chat', async (request, response) => {
-    const { slug } = request.params
-    const { message, mode = 'query' } = reqBody(request)
+  app.post("/workspace/:slug/chat", async (request, response) => {
+    const { slug } = request.params;
+    const { message, mode = "query" } = reqBody(request);
     const workspace = await Workspace.get(`slug = '${slug}'`);
     if (!workspace) {
       response.sendStatus(400).end();
@@ -16,8 +16,7 @@ function chatEndpoints(app) {
 
     const result = await chatWithWorkspace(workspace, message, mode);
     response.status(200).json({ ...result });
-  })
-
+  });
 }
 
-module.exports = { chatEndpoints }
+module.exports = { chatEndpoints };
diff --git a/server/endpoints/system.js b/server/endpoints/system.js
@@ -1,34 +1,46 @@
-require('dotenv').config({ path: `.env.${process.env.NODE_ENV}` })
-const { Pinecone } = require('../utils/pinecone');
-const { viewLocalFiles } = require('../utils/files');
+require("dotenv").config({ path: `.env.${process.env.NODE_ENV}` });
+const { viewLocalFiles } = require("../utils/files");
+const { getVectorDbClass } = require("../utils/helpers");
 
 function systemEndpoints(app) {
   if (!app) return;
 
-  app.get('/ping', (_, response) => {
+  app.get("/ping", (_, response) => {
     response.sendStatus(200);
-  })
+  });
 
-  app.get('/setup-complete', (_, response) => {
+  app.get("/setup-complete", (_, response) => {
+    const vectorDB = process.env.VECTOR_DB || "pinecone";
     const results = {
+      VectorDB: vectorDB,
       OpenAiKey: !!process.env.OPEN_AI_KEY,
-      OpenAiModelPref: process.env.OPEN_MODEL_PREF || 'gpt-3.5-turbo',
-      PineConeEnvironment: process.env.PINECONE_ENVIRONMENT,
-      PineConeKey: !!process.env.PINECONE_API_KEY,
-      PinceConeIndex: process.env.PINECONE_INDEX,
-    }
-    response.status(200).json({ results })
-  })
+      OpenAiModelPref: process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo",
+      ...(vectorDB === "pinecone"
+        ? {
+          PineConeEnvironment: process.env.PINECONE_ENVIRONMENT,
+          PineConeKey: !!process.env.PINECONE_API_KEY,
+          PineConeIndex: process.env.PINECONE_INDEX,
+        }
+        : {}),
+      ...(vectorDB === "chroma"
+        ? {
+          ChromaEndpoint: process.env.CHROMA_ENDPOINT,
+        }
+        : {}),
+    };
+    response.status(200).json({ results });
+  });
 
-  app.get('/system-vectors', async (_, response) => {
-    const vectorCount = await Pinecone.totalIndicies();
-    response.status(200).json({ vectorCount })
-  })
+  app.get("/system-vectors", async (_, response) => {
+    const VectorDb = getVectorDbClass();
+    const vectorCount = await VectorDb.totalIndicies();
+    response.status(200).json({ vectorCount });
+  });
 
-  app.get('/local-files', async (_, response) => {
-    const localFiles = await viewLocalFiles()
-    response.status(200).json({ localFiles })
-  })
+  app.get("/local-files", async (_, response) => {
+    const localFiles = await viewLocalFiles();
+    response.status(200).json({ localFiles });
+  });
 }
 
-module.exports = { systemEndpoints }
+module.exports = { systemEndpoints };