-
Notifications
You must be signed in to change notification settings - Fork 259
/
chat-post.ts
135 lines (115 loc) · 5.67 KB
/
chat-post.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import { Readable } from 'node:stream';
import { HttpRequest, InvocationContext, HttpResponseInit, app } from '@azure/functions';
import { AIChatCompletionRequest, AIChatCompletionDelta } from '@microsoft/ai-chat-protocol';
import { Document } from '@langchain/core/documents';
import { AzureOpenAIEmbeddings, AzureChatOpenAI } from '@langchain/openai';
import { Embeddings } from '@langchain/core/embeddings';
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { VectorStore } from '@langchain/core/vectorstores';
import { OllamaEmbeddings } from '@langchain/community/embeddings/ollama';
import { ChatOllama } from '@langchain/community/chat_models/ollama';
import { FaissStore } from '@langchain/community/vectorstores/faiss';
import { ChatPromptTemplate, PromptTemplate } from '@langchain/core/prompts';
import { createStuffDocumentsChain } from 'langchain/chains/combine_documents';
import { AzureAISearchVectorStore } from '@langchain/community/vectorstores/azure_aisearch';
import { createRetrievalChain } from 'langchain/chains/retrieval';
import 'dotenv/config';
import { badRequest, data, serviceUnavailable } from '../http-response';
import { ollamaChatModel, ollamaEmbeddingsModel, faissStoreFolder } from '../constants';
import { getAzureOpenAiTokenProvider, getCredentials } from '../security';
const systemPrompt = `Assistant helps the Consto Real Estate company customers with questions and support requests. Be brief in your answers. Answer only plain text, DO NOT use Markdown.
Answer ONLY with information from the sources below. If there isn't enough information in the sources, say you don't know. Do not generate answers that don't use the sources. If asking a clarifying question to the user would help, ask the question.
If the user question is not in English, answer in the language used in the question.
Each source has the format "[filename]: information". ALWAYS reference the source filename for every part used in the answer. Use the format "[filename]" to reference a source, for example: [info1.txt]. List each source separately, for example: [info1.txt][info2.pdf].
Generate 3 very brief follow-up questions that the user would likely ask next.
Enclose the follow-up questions in double angle brackets. Example:
<<Am I allowed to invite friends for a party?>>
<<How can I ask for a refund?>>
<<What If I break something?>>
Do no repeat questions that have already been asked.
Make sure the last question ends with ">>".
SOURCES:
{context}`;
export async function postChat(request: HttpRequest, context: InvocationContext): Promise<HttpResponseInit> {
const azureOpenAiEndpoint = process.env.AZURE_OPENAI_API_ENDPOINT;
try {
const requestBody = (await request.json()) as AIChatCompletionRequest;
const { messages } = requestBody;
if (!messages || messages.length === 0 || !messages.at(-1)?.content) {
return badRequest('Invalid or missing messages in the request body');
}
let embeddings: Embeddings;
let model: BaseChatModel;
let store: VectorStore;
if (azureOpenAiEndpoint) {
const credentials = getCredentials();
const azureADTokenProvider = getAzureOpenAiTokenProvider();
// Initialize models and vector database
embeddings = new AzureOpenAIEmbeddings({ azureADTokenProvider });
model = new AzureChatOpenAI({
// Controls randomness. 0 = deterministic, 1 = maximum randomness
temperature: 0.7,
azureADTokenProvider,
});
store = new AzureAISearchVectorStore(embeddings, { credentials });
} else {
// If no environment variables are set, it means we are running locally
context.log('No Azure OpenAI endpoint set, using Ollama models and local DB');
embeddings = new OllamaEmbeddings({ model: ollamaEmbeddingsModel });
model = new ChatOllama({
temperature: 0.7,
model: ollamaChatModel,
});
store = await FaissStore.load(faissStoreFolder, embeddings);
}
// Create the chain that combines the prompt with the documents
const combineDocsChain = await createStuffDocumentsChain({
llm: model,
prompt: ChatPromptTemplate.fromMessages([
['system', systemPrompt],
['human', '{input}'],
]),
documentPrompt: PromptTemplate.fromTemplate('[{source}]: {page_content}\n'),
});
// Create the chain to retrieve the documents from the database
const chain = await createRetrievalChain({
retriever: store.asRetriever(3),
combineDocsChain,
});
const lastUserMessage = messages.at(-1)!.content;
const responseStream = await chain.stream({
input: lastUserMessage,
});
const jsonStream = Readable.from(createJsonStream(responseStream));
return data(jsonStream, {
'Content-Type': 'application/x-ndjson',
'Transfer-Encoding': 'chunked',
});
} catch (_error: unknown) {
const error = _error as Error;
context.error(`Error when processing chat-post request: ${error.message}`);
return serviceUnavailable('Service temporarily unavailable. Please try again later.');
}
}
// Transform the response chunks into a JSON stream
async function* createJsonStream(chunks: AsyncIterable<{ context: Document[]; answer: string }>) {
for await (const chunk of chunks) {
if (!chunk.answer) continue;
const responseChunk: AIChatCompletionDelta = {
delta: {
content: chunk.answer,
role: 'assistant',
},
};
// Format response chunks in Newline delimited JSON
// see https://github.com/ndjson/ndjson-spec
yield JSON.stringify(responseChunk) + '\n';
}
}
app.setup({ enableHttpStream: true });
app.http('chat-post', {
route: 'chat/stream',
methods: ['POST'],
authLevel: 'anonymous',
handler: postChat,
});