-
Notifications
You must be signed in to change notification settings - Fork 180
/
route.ts
90 lines (75 loc) · 2.4 KB
/
route.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import { NextResponse } from 'next/server';
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { PDFLoader } from 'langchain/document_loaders/fs/pdf';
import prisma from '@/utils/prisma';
import { getAuth } from '@clerk/nextjs/server';
import { loadEmbeddingsModel } from '../utils/embeddings';
import { loadVectorStore } from '../utils/vector_store';
import { type MongoClient } from 'mongodb';
export async function POST(request: Request) {
let mongoDbClient: MongoClient | null = null;
const { fileUrl, fileName, vectorStoreId } = await request.json();
const { userId } = getAuth(request as any);
if (!userId) {
return NextResponse.json({ error: 'You must be logged in to ingest data' });
}
const docAmount = await prisma.document.count({
where: {
userId,
},
});
if (docAmount > 3) {
return NextResponse.json({
error: 'You have reached the maximum number of documents',
});
}
const doc = await prisma.document.create({
data: {
fileName,
fileUrl,
userId,
},
});
const namespace = doc.id;
try {
/* load from remote pdf URL */
const response = await fetch(fileUrl);
const buffer = await response.blob();
const loader = new PDFLoader(buffer);
const rawDocs = await loader.load();
/* Split text into chunks */
const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000,
chunkOverlap: 200,
});
const splitDocs = await textSplitter.splitDocuments(rawDocs);
// Necessary for Mongo - we'll query on this later.
for (const splitDoc of splitDocs) {
splitDoc.metadata.docstore_document_id = namespace;
}
console.log('creating vector store...');
/* create and store the embeddings in the vectorStore */
const embeddings = loadEmbeddingsModel();
const store = await loadVectorStore({
namespace: doc.id,
embeddings,
});
const vectorstore = store.vectorstore;
if ('mongoDbClient' in store) {
mongoDbClient = store.mongoDbClient;
}
// embed the PDF documents
await vectorstore.addDocuments(splitDocs);
} catch (error) {
console.log('error', error);
return NextResponse.json({ error: 'Failed to ingest your data' });
} finally {
if (mongoDbClient) {
await mongoDbClient.close();
}
}
return NextResponse.json({
text: 'Successfully embedded pdf',
id: namespace,
});
}