-
Notifications
You must be signed in to change notification settings - Fork 39
/
Copy path07-hybrid-search.sql
81 lines (77 loc) · 2.13 KB
/
07-hybrid-search.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
/*
Get the embeddings for the input text by calling the OpenAI API
and then search the most similar articles (by title)
Note: <deployment-id> needs to be replaced with the deployment name of your embedding model in Azure OpenAI
*/
DECLARE @q NVARCHAR(1000) = 'the foundation series by isaac asimov';
DECLARE @k INT = 10
DECLARE @r INT, @e VECTOR(1536);
EXEC @r = dbo.get_embedding '<deployment-id>', @q, @e OUTPUT;
IF (@r != 0) SELECT @r;
WITH keyword_search AS (
SELECT TOP(@k)
id,
RANK() OVER (ORDER BY ft_rank DESC) AS rank,
title,
[text]
FROM
(
SELECT TOP(@k)
id,
ftt.[RANK] AS ft_rank,
title,
[text]
FROM
dbo.wikipedia_articles_embeddings w
INNER JOIN
FREETEXTTABLE(dbo.wikipedia_articles_embeddings, *, @q) AS ftt ON w.id = ftt.[KEY]
ORDER BY
ft_rank DESC
) AS freetext_documents
ORDER BY
rank ASC
),
semantic_search AS
(
SELECT TOP(@k)
id,
RANK() OVER (ORDER BY cosine_distance) AS rank
FROM
(
SELECT TOP(@k)
id,
VECTOR_DISTANCE('cosine', @e, content_vector_ada2) AS cosine_distance
FROM
dbo.wikipedia_articles_embeddings w
ORDER BY
cosine_distance
) AS similar_documents
),
result AS (
SELECT TOP(@k)
COALESCE(ss.id, ks.id) AS id,
ss.rank AS semantic_rank,
ks.rank AS keyword_rank,
COALESCE(1.0 / (@k + ss.rank), 0.0) +
COALESCE(1.0 / (@k + ks.rank), 0.0) AS score -- Reciprocal Rank Fusion (RRF)
FROM
semantic_search ss
FULL OUTER JOIN
keyword_search ks ON ss.id = ks.id
ORDER BY
score DESC
)
SELECT
w.id,
cast(score * 1000 as int) as rrf_score,
rank() OVER(ORDER BY cast(score * 1000 AS INT) DESC) AS rrf_rank,
semantic_rank,
keyword_rank,
w.title,
w.[text]
FROM
result AS r
INNER JOIN
dbo.wikipedia_articles_embeddings AS w ON r.id = w.id
ORDER BY
rrf_rank