# Shipping as a web API

In [1]:
import "dotenv/config";

[Module: null prototype] { default: {} }

In [2]:
import { 
  loadAndSplitChunks, 
  initializeVectorstoreWithDocuments 
} from "./lib/helpers.ts";

const splitDocs = await loadAndSplitChunks({
  chunkSize: 1536,
  chunkOverlap: 128,
});

const vectorstore = await initializeVectorstoreWithDocuments({
  documents: splitDocs,
});

const retriever = vectorstore.asRetriever();

In [3]:
import { 
  createDocumentRetrievalChain, 
  createRephraseQuestionChain 
} from "./lib/helpers.ts";

const documentRetrievalChain = createDocumentRetrievalChain();
const rephraseQuestionChain = createRephraseQuestionChain();

In [4]:
import { ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";

const ANSWER_CHAIN_SYSTEM_TEMPLATE = `You are an experienced researcher,
expert at interpreting and answering questions based on provided sources.
Using the below provided context and chat history, 
answer the user's question to the best of your ability
using only the resources provided. Be verbose!

<context>
{context}
</context>`;

const answerGenerationChainPrompt = ChatPromptTemplate.fromMessages([
  ["system", ANSWER_CHAIN_SYSTEM_TEMPLATE],
  new MessagesPlaceholder("history"),
  [
    "human", 
    `Now, answer this question using the previous context and chat history:
  
    {standalone_question}`
  ]
]);

In [5]:
import { 
  RunnablePassthrough, 
  RunnableSequence 
} from "@langchain/core/runnables";
import { ChatOpenAI } from "@langchain/openai";

const conversationalRetrievalChain = RunnableSequence.from([
  RunnablePassthrough.assign({
    standalone_question: rephraseQuestionChain,
  }),
  RunnablePassthrough.assign({
    context: documentRetrievalChain,
  }),
  answerGenerationChainPrompt,
  new ChatOpenAI({ modelName: "gpt-3.5-turbo-1106" }),
]);

In [6]:
import { HttpResponseOutputParser } from "langchain/output_parsers";

// "text/event-stream" is also supported
const httpResponseOutputParser = new HttpResponseOutputParser({
  contentType: "text/plain"
});

In [7]:
import { RunnableWithMessageHistory } from "@langchain/core/runnables"; 
import { ChatMessageHistory } from "langchain/stores/message/in_memory";

const messageHistory = new ChatMessageHistory();

const finalRetrievalChain = new RunnableWithMessageHistory({
  runnable: conversationalRetrievalChain,
  getMessageHistory: (_sessionId) => messageHistory,
  historyMessagesKey: "history",
  inputMessagesKey: "question",
}).pipe(httpResponseOutputParser);

In [8]:
const messageHistories = {};

const getMessageHistoryForSession = (sessionId) => {
    if (messageHistories[sessionId] !== undefined) {
        return messageHistories[sessionId];
    } 
    const newChatSessionHistory = new ChatMessageHistory();
    messageHistories[sessionId] = newChatSessionHistory;
    return newChatSessionHistory;
};

We'll recreate our final chain with this new method:

In [9]:
const finalRetrievalChain = new RunnableWithMessageHistory({
  runnable: conversationalRetrievalChain,
  getMessageHistory: getMessageHistoryForSession,
  inputMessagesKey: "question",
  historyMessagesKey: "history",
}).pipe(httpResponseOutputParser);

In [10]:
const port = 8087;

In [11]:
const handler = async (request: Request): Response => {
  const body = await request.json();
  const stream = await finalRetrievalChain.stream({
    question: body.question
  }, { configurable: { sessionId: body.session_id } });

  return new Response(stream, { 
    status: 200,
    headers: {
      "Content-Type": "text/plain"
    },
  });
};

In [12]:
Deno.serve({ port }, handler);

Listening on http://localhost:8087/


{
  addr: [Object: null prototype] {
    hostname: [32m"localhost"[39m,
    port: [33m8087[39m,
    transport: [32m"tcp"[39m
  },
  finished: Promise { [36m<pending>[39m },
  shutdown: [36m[AsyncFunction: shutdown][39m,
  ref: [36m[Function: ref][39m,
  unref: [36m[Function: unref][39m,
  [[32mSymbol(Symbol.asyncDispose)[39m]: [36m[Function: [Symbol.asyncDispose]][39m
}

In [13]:
const decoder = new TextDecoder();

// readChunks() reads from the provided reader and yields the results into an async iterable
function readChunks(reader) {
  return {
    async* [Symbol.asyncIterator]() {
      let readResult = await reader.read();
      while (!readResult.done) {
        yield decoder.decode(readResult.value);
        readResult = await reader.read();
      }
    },
  };
}

const sleep = async () => {
  return new Promise((resolve) => setTimeout(resolve, 500));
}

In [14]:
const response = await fetch(`http://localhost:${port}`, {
    method: "POST",
    headers: {
        "content-type": "application/json",
    },
    body: JSON.stringify({
        question: "What are the prerequisites for this course?",
        session_id: "1", // Should randomly generate/assign
    })
});

// response.body is a ReadableStream
const reader = response.body?.getReader();

for await (const chunk of readChunks(reader)) {
  console.log("CHUNK:", chunk);
}

await sleep();

CHUNK: Based on the
CHUNK:  provided context, the instructor mentioned the expectations for
CHUNK:  the course. He
CHUNK:  stated that the
CHUNK:  course will
CHUNK:  not be very programming intensive
CHUNK: , although there will
CHUNK:  be some programming, mostly
CHUNK:  in MATLAB or Oct
CHUNK: ave. Familiarity with
CHUNK:  basic probability and statistics
CHUNK:  is also assumed, with mention
CHUNK:  of a typical undergraduate
CHUNK:  statistics class like Stat 
CHUNK: 116 at Stanford being
CHUNK:  more than enough.
CHUNK:  

In addition,
CHUNK:  basic familiarity with linear
CHUNK:  algebra is
CHUNK:  assumed, with most
CHUNK:  undergraduate linear algebra courses being
CHUNK:  sufficient.
CHUNK:  Specific courses mentioned
CHUNK:  include Math 51,
CHUNK:  103, Math 
CHUNK: 113, or CS
CHUNK: 205 at Stanford. Understanding
CHUNK:  of random
CHUNK:  variables, expectation,
CHUNK:  variance,
CHUNK:  matrixes,
CHUNK:  vectors, matrix
CHUNK:  multiplication, matrix
CHUNK:  inverse,
CHUN

In [15]:
const response = await fetch(`http://localhost:${port}`, {
  method: "POST",
  headers: {
    "content-type": "application/json",
  },
  body: JSON.stringify({
    question: "Can you list them in bullet point format?",
    session_id: "1", // Should randomly generate/assign
  })
});

// response.body is a ReadableStream
const reader = response.body?.getReader();

for await (const chunk of readChunks(reader)) {
  console.log("CHUNK:", chunk);
}

await sleep();

CHUNK: - Familiar
CHUNK: ity with basic programming, mostly in MATLAB or Octave
- Basic u
CHUNK: nderstanding of probability and statistics, similar to what is c
CHUNK: overed in a typical undergraduate statistics class like Stat 116
CHUNK:  at Stanford
- Basic familiarity
CHUNK:  with linear algebra, comparable to courses such
CHUNK:  as Math 51, 103
CHUNK: , Math 113
CHUNK: , or CS205 at Stanford

CHUNK: - Understanding of random variables, expectation
CHUNK: , variance, matrixes, vectors,
CHUNK:  matrix multiplication, matrix inverse, and eig
CHUNK: envectors
- Review sections will cover prerequisites
CHUNK:  for those who may need a ref
CHUNK: resher


In [16]:
const response = await fetch(`http://localhost:${port}`, {
  method: "POST",
  headers: {
    "content-type": "application/json",
  },
  body: JSON.stringify({
    question: "What did I just ask you?",
    session_id: "2", // Should randomly generate/assign
  })
});

// response.body is a ReadableStream
const reader = response.body?.getReader();

for await (const chunk of readChunks(reader)) {
  console.log("CHUNK:", chunk);
}

await sleep();

CHUNK: Based on
CHUNK:  the
CHUNK:  provided context, it appears that you have not asked a specific
CHUNK:  question that is referenced directly. However, it seems that yo
CHUNK: u may be looking for some sort of information relevant to the co
CHUNK: urse material, or a question could be related to the content of 
CHUNK: the lecture or some key points being discussed by the instructor
CHUNK:  during the class. If there was a specific question that you ask
CHUNK: ed before this, please provide the details or context, and I wil
CHUNK: l be happy to help you with the answer.
