In [1]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
loader = TextLoader("../state_of_the_union.txt", encoding="utf-8")
documents = loader.load()

In [4]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=80)
chunks = splitter.split_documents(documents)

In [5]:
len(chunks)

42

### OpenAI embeddings

In [None]:
# open ai embeddings
embeddings = OpenAIEmbeddings()
vector = embeddings.embed_query("Test my embeddings")

### Vertex AI gemini embeddings

In [6]:
# Google embeddings
from langchain_chroma import Chroma  # a vector db to store embeddings
from langchain_google_genai import GoogleGenerativeAIEmbeddings  # to create embeddings
import os

load_dotenv()
embeddings = GoogleGenerativeAIEmbeddings(model='models/embedding-001')

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
vector = embeddings.embed_query("Test my embeddings")

In [8]:
len(vector)

768

In [9]:
doc_vectors = embeddings.embed_documents([t.page_content for t in chunks[:5]])

In [10]:
len(doc_vectors)

5

In [11]:
from langchain_postgres import PGVector
from langchain_postgres.vectorstores import PGVector

CONNECTION = "postgresql+psycopg2://postgres:admin@localhost:5432/vector_db"  # ses psycopg3!
COLLECTION_NAME = "state_of_union_vectors"

vectorstore = PGVector.from_documents(
                embedding=embeddings, 
                documents=chunks, 
                collection_name=COLLECTION_NAME,
                connection=CONNECTION)

In [1]:
from langchain_postgres.vectorstores import PGVector
PGVector()

### Similarity search

In [12]:
query = "What is the stance about the USA fight against Russian oligarchs why are they fighting"
similar = vectorstore.similarity_search_with_score(query, k=3)

In [13]:
for doc in similar:
    print(doc)

(Document(metadata={'source': '../state_of_the_union.txt'}, page_content='The U.S. Department of Justice is assembling a dedicated task force to go after the crimes of Russian oligarchs.  \n\nWe are joining with our European allies to find and seize your yachts your luxury apartments your private jets. We are coming for your ill-begotten gains. \n\nAnd tonight I am announcing that we will join our allies in closing off American air space to all Russian flights – further isolating Russia – and adding an additional squeeze –on their economy. The Ruble has lost 30% of its value. \n\nThe Russian stock market has lost 40% of its value and trading remains suspended. Russia’s economy is reeling and Putin alone is to blame. \n\nTogether with our allies we are providing support to the Ukrainians in their fight for freedom. Military assistance. Economic assistance. Humanitarian assistance. \n\nWe are giving more than $1 Billion in direct assistance to Ukraine. \n\nAnd we will continue to aid the

### Retrievers

In [14]:
retriever = vectorstore.as_retriever(search_kwargs={'k': 3})

In [None]:
!gcloud auth login 

In [4]:
!gcloud config set project makecom-oauth-432508

Updated property [core/project].


In [2]:
from langchain.chains import RetrievalQA
from langchain_google_vertexai import VertexAI
import vertexai

vertexai.init(project='makecom-oauth-432508', location="us-central1")

In [None]:
qa = RetrievalQA.from_chain_type(
    llm = VertexAI(),
    retriever=retriever,
    chain_type='stuff'
)

In [None]:
query = "What is the stance about the USA fight against Russian oligarchs why are they fighting"
qa.invoke(query)

### Similarity search using vectors

In [20]:
print(embeddings.embed_query(query))

[0.0009888834320008755, -0.026340562850236893, 0.007736309431493282, -0.02344955876469612, 0.016696453094482422, -0.0027476230170577765, 0.019886845722794533, 0.01318899355828762, -0.008938956074416637, 0.040280334651470184, 0.014471798203885555, 0.0015053164679557085, -0.003520844504237175, 0.01743081770837307, -0.0018641356145963073, -0.030010638758540154, 0.024751972407102585, 0.0068791527301073074, 0.04060770571231842, -0.04066096618771553, -0.005298595875501633, 0.021394962444901466, -0.0036250585690140724, -0.009210529737174511, 0.005868952255696058, -0.03266841173171997, 0.02428959682583809, -0.0908043161034584, 0.01799178309738636, 0.02485017105937004, -0.08454158157110214, 0.047325070947408676, -0.0436805821955204, 0.033215802162885666, 0.08596765249967575, -0.07334335893392563, 0.028026215732097626, 0.02365100011229515, -0.03437275066971779, 0.038043685257434845, 0.005486993119120598, -0.005746247246861458, -0.04237205535173416, -0.010365850292146206, 0.032408494502305984, 0.

In [None]:
SELECT document, (embedding <=> '[0.025266004726290703, 0.013115121982991695, -0.045735009014606476, -0.02131330594420433, 0.013878452591598034, 0.01492327731102705, -0.0007575495401397347, 0.014810919761657715, 0.05850259214639664, 0.03703628480434418, 0.006622023414820433, -0.004800699185580015, -0.010090547613799572, 0.0012274839682504535, -0.00684451125562191, 0.00997084379196167, -0.003475077450275421, -0.00026561852428130805, -0.03089868277311325, -0.04397719353437424, -0.010400200262665749, 0.0047026206739246845, 0.023981686681509018, 0.0030161465983837843, -0.0170903243124485, -0.013916644267737865, -0.00025566641124896705, -0.05309079587459564, -0.04090951755642891, 0.012735516764223576, -0.047949496656656265, 0.03015226498246193, -0.0438607856631279, -0.0007694462547078729, 0.05691663548350334, -0.05644495040178299, 0.017435269430279732, 0.02907818742096424, -0.034888070076704025, 0.020228540524840355, 0.023034919053316116, -0.043016303330659866, -0.003883340861648321, 0.0006977839511819184, 0.028489889577031136, -0.019717516377568245, -0.027602508664131165, 0.0007717021508142352, 0.014380109496414661, -0.032934918999671936, -0.013779443688690662, 0.03300606831908226, 0.011339585296809673, -0.0009181061177514493, 0.012878116220235825, 0.014799905940890312, 0.024425294250249863, -0.013008094392716885, -0.03572892025113106, -0.0013692902866750956, -0.004575730767101049, 0.01492219977080822, -0.05970986932516098, 0.029172547161579132, -0.024183616042137146, 0.03002023510634899, -0.03886226937174797, 0.02446272224187851, 0.062388040125370026, -0.04137968271970749, 0.029807040467858315, -0.011735951527953148, 0.04350990056991577, 0.0021012811921536922, -0.05738916993141174, -0.07086175680160522, -0.04753836616873741, 0.0963258370757103, 0.02814856916666031, -0.003850142937153578, -0.02983126789331436, -0.043042298406362534, -0.04828431084752083, -0.043071627616882324, -0.07270623743534088, 0.0014241499593481421, -0.005975636653602123, 0.017396962270140648, 0.018484359607100487, 0.03466227278113365, -0.01690380461513996, 0.013208813033998013, 0.05668121576309204, -0.07942582666873932, 0.03232878819108009, 0.05265947803854942, 8.864948176778853e-05, 0.003738036612048745, -0.009089069440960884, -0.03494149446487427, -0.013907704502344131, -0.01825922727584839, -0.012537513859570026, 0.012304730713367462, 0.039283547550439835, 0.00543220667168498, -0.04376824200153351, 0.006796770729124546, -0.031287387013435364, 0.06430593878030777, -0.0951557382941246, -0.01031950581818819, -0.05317765474319458, -0.0072473748587071896, 0.05214597284793854, -0.06154722720384598, -0.0045062219724059105, 0.10690408200025558, 0.03884575888514519, 0.023169981315732002, -9.517202124698088e-05, -0.006640717852860689, 0.009804981760680676, 0.01463934313505888, 0.0290147103369236, -0.006442422512918711, 0.03796017915010452, 0.042557813227176666, 0.02226199023425579, 0.04843522235751152, -0.011527618393301964, -0.020886410027742386, 0.018844906240701675, -0.02524724416434765, -0.02098049409687519, 0.026038553565740585, 0.05179250240325928, -0.056207846850156784, 0.03452654927968979, 0.0028403159230947495, -0.0224191602319479, 0.01771169900894165, -0.007392244413495064, 0.03570985794067383, -0.020288672298192978, 0.03680700808763504, 0.021205153316259384, -0.029287101700901985, 0.007052217610180378, -0.048401620239019394, -0.006702139973640442, -0.019772261381149292, -0.028627987951040268, 0.022757593542337418, 0.05909557640552521, 0.011359871365129948, 0.01882333680987358, -0.023382805287837982, -0.009477402083575726, 0.020843245089054108, 0.08663064986467361, 0.00914133619517088, 0.004051882773637772, -0.005007004830986261, -0.04200495406985283, -0.022565236315131187, 0.015353026799857616, 0.00500055868178606, -0.0003462781314738095, 0.02132573164999485, 0.006397309713065624, 0.007404230069369078, -0.020844602957367897, -0.033362243324518204, 0.012666406109929085, -0.043509308248758316, 0.025612056255340576, 0.024022839963436127, -0.06644309312105179, 0.015772972255945206, -0.029137128964066505, -0.012294597923755646, -0.03176455572247505, 0.00861742440611124, 0.036251313984394073, -0.010400127619504929, 0.10853234678506851, -0.04635249823331833, -0.019096942618489265, 0.017044736072421074, -0.030501239001750946, 0.00811220332980156, -0.0692402571439743, 0.018643556162714958, -0.037142232060432434, 0.037959907203912735, -0.0030559119768440723, 0.018200280144810677, -0.02816707268357277, -0.01682189106941223, -0.016860390082001686, 0.06832534074783325, -0.024106519296765327, 0.0013932528672739863, 0.07334455102682114, -0.013477476313710213, 0.05801768973469734, -0.038779642432928085, -0.02200707048177719, 0.04395077005028725, -0.024853648617863655, 0.005521832499653101, -0.04608672484755516, -0.005584105849266052, 0.05112381651997566, 0.07242874056100845, -0.0451618954539299, 0.02508840337395668, 0.009326276369392872, 0.0010589334415271878, 0.039978280663490295, -0.00043388473568484187, -0.028630567714571953, 0.005859370809048414, 0.01471034623682499, -0.004527021199464798, -0.03568858653306961, 0.013500874862074852, 0.013490700162947178, -0.05506421998143196, -0.024544157087802887, 0.05341758579015732, 0.05453888326883316, -0.06143464893102646, 0.07767271995544434, -0.018296990543603897, -0.03403601050376892, -0.012703869491815567, 0.006196530070155859, 0.05046578496694565, -0.0498381033539772, 0.02829699218273163, 0.017613181844353676, 0.01929299347102642, -0.019976463168859482, -0.008306859992444515, -0.04242773726582527, 0.06134495139122009, 0.021095264703035355, 0.007994831539690495, 0.0427902452647686, -0.06102304905653, 0.028827812522649765, 0.05177586153149605, -0.028878632932901382, 0.050246722996234894, -0.05602452531456947, -0.01885254494845867, -0.03486631438136101, -0.012967671267688274, 0.08883093297481537, -0.04433118924498558, 0.029453162103891373, 0.016194405034184456, -0.020230071619153023, 0.007502140011638403, -0.001650727353990078, 0.007925599813461304, -0.035257916897535324, -0.03474472090601921, 0.01083129271864891, -0.054133180528879166, 0.07415729761123657, -0.017244063317775726, -0.0389377698302269, 0.0028275062795728445, -0.04769964516162872, 0.0010975546902045608, 0.030056655406951904, -0.011537143029272556, -0.008112887851893902, -0.006789619103074074, -0.0015443966258317232, -0.011236880905926228, 0.007298542186617851, -0.044836390763521194, -0.014477820135653019, -0.021142898127436638, 0.03331888094544411, -0.01041422225534916, -0.04651075229048729, -0.0323304608464241, -0.03154413402080536, -0.020615527406334877, -0.044916048645973206, -0.008933664299547672, 0.015410101972520351, 0.012142127379775047, 0.03470081090927124, -0.014534932561218739, 0.024779457598924637, -0.017555570229887962, 0.023508364334702492, -0.11434976011514664, -0.017152007669210434, -0.0103102782741189, 0.04413361847400665, -0.03891610726714134, 0.027560284361243248, -0.02556096389889717, -0.01705976203083992, -0.03307415544986725, 0.02933017909526825, 0.022754492238163948, 0.06516133248806, 0.03044852800667286, -0.055190037935972214, 0.09606963396072388, 0.01789981871843338, 0.04109309986233711, 0.006861239206045866, 0.052134595811367035, 0.014059367589652538, 0.015722032636404037, 0.036423034965991974, -0.004572112578898668, -0.07721208035945892, 0.06413751095533371, 0.0010043092770501971, -0.017071355134248734, -0.02095184288918972, 0.005547324661165476, -0.0211024172604084, -0.026171425357460976, 0.013072507455945015, 0.033574678003787994, -0.016457876190543175, -0.008554902859032154, -0.05402771756052971, 0.01015819888561964, 0.034045279026031494, 0.03953511267900467, 0.005647841840982437, -0.03835850581526756, 0.01861150935292244, 0.0018606178928166628, -0.045221999287605286, 0.03692638501524925, 0.09018994122743607, 0.06583409011363983, -0.014146069064736366, 0.08439955860376358, 0.008317572996020317, -0.008847692050039768, -0.0022046412341296673, -0.00017227194621227682, 0.059699952602386475, -0.00183592876419425, 0.03766703978180885, -0.04475291445851326, 0.0014237138675525784, 0.013677380047738552, 0.0030160790774971247, -0.021235188469290733, 0.009349930100142956, 0.06089773401618004, 0.0236088614910841, 0.01949373073875904, -0.031415361911058426, 0.043684400618076324, -0.02313501015305519, 0.0031390783842653036, 0.027124568819999695, -0.011272745206952095, -0.07087815552949905, -0.020229127258062363, -0.0006819501286372542, -0.002728122053667903, -0.0033905666787177324, 0.0010270867496728897, -0.0559854619204998, -0.015588140115141869, 0.012213182635605335, 0.0190879013389349, 0.028438644483685493, 0.000172820728039369, 0.015328298322856426, 0.07981354743242264, 0.022071074694395065, -0.0050864373333752155, 0.014285068027675152, -0.01929626613855362, 0.08199641853570938, 0.013724736869335175, -0.060775697231292725, 0.011796881444752216, 0.013810602948069572, -0.03815850615501404, -0.018526842817664146, -0.027395671233534813, -0.014580637216567993, -0.011641668155789375, -0.012387470342218876, -0.018735134974122047, 0.014009715989232063, -0.0008577591506764293, -0.07869409769773483, -0.017546212300658226, -0.009698445908725262, -0.024384241551160812, 0.04213692247867584, 0.014177866280078888, 0.024433068931102753, -0.07520131766796112, -0.05211690813302994, -0.05775431543588638, 0.03746522590517998, 0.013790386728942394, 0.014780119992792606, 0.06034469977021217, 0.013454673811793327, -0.005092054605484009, 0.009821397252380848, 0.028516119346022606, -0.029247015714645386, -0.045241281390190125, 0.005008041393011808, -0.013229022733867168, 0.00936161633580923, 0.04807642474770546, 0.02705979533493519, 0.03555862978100777, -0.013636772520840168, -0.014243684709072113, 0.005187849514186382, -0.04329373687505722, 0.01982754096388817, 0.020205333828926086, -0.04218139871954918, -0.04546410217881203, -0.003553270362317562, -0.00657880213111639, 0.017617642879486084, 0.028098581358790398, -0.042638666927814484, -0.031003663316369057, -0.048713237047195435, -0.06779752671718597, 0.00646256422623992, -0.06965862214565277, -0.02638598531484604, -0.0830087885260582, -0.011341781355440617, -0.03841076046228409, 0.005972313228994608, -0.08054035902023315, 0.02517588809132576, -0.006861737463623285, -0.0028509427793323994, -0.009299792349338531, -0.006541240029036999, -0.01351578813046217, -0.01944636180996895, -0.06313155591487885, 0.04251960664987564, -0.029801281169056892, 0.033418938517570496, -0.005319630727171898, 0.04931383207440376, 0.022185135632753372, 0.00019144246471114457, 0.007205409463495016, -0.009054507128894329, 0.020782848820090294, 0.017542865127325058, -0.01804896630346775, -0.07978475093841553, -0.032260287553071976, 0.02103443816304207, 0.0026151705533266068, -0.00018324742268305272, -0.053093284368515015, 0.00983294565230608, -0.0054094139486551285, 0.011999888345599174, 0.022557726129889488, 0.0066131772473454475, -0.04610048979520798, -0.038320332765579224, 0.09362837672233582, -0.05151866003870964, 0.029166273772716522, 0.025808606296777725, -0.022747326642274857, -0.012417852878570557, 0.011131630279123783, 0.006166265811771154, 0.02213175594806671, 0.04419803246855736, -0.0008809289429336786, -0.021025316789746284, -0.022277064621448517, -0.021131183952093124, -0.024261411279439926, 0.03921845927834511, -0.06280277669429779, 0.025878557935357094, 0.00030093456734903157, 0.012639645487070084, 0.001707224640995264, -0.0034269066527485847, 0.03816264495253563, -0.004158604424446821, -0.07250552624464035, 0.03855925798416138, -0.002577059203758836, -0.02357521466910839, 0.024152163416147232, 0.02264765277504921, 0.0005214813863858581, 0.04199354723095894, -0.03438008204102516, -0.09339362382888794, 0.013348860666155815, -0.033708930015563965, -0.02321682684123516, -0.005178079009056091, 0.030008917674422264, -0.016506869345903397, 0.016093185171484947, -0.008129335008561611, 0.0856115072965622, -0.1015908271074295, 0.0024533700197935104, 0.010012193582952023, 0.030706942081451416, 0.0037092152051627636, 0.02435128763318062, -0.02797989919781685, 0.03606424108147621, 0.008108268491923809, -0.0037468881346285343, -0.022917140275239944, 0.013103417120873928, -0.007259221747517586, -0.011517633683979511, 0.014148799702525139, -0.05566662549972534, 0.033712007105350494, -0.03792646899819374, -0.03359273076057434, -0.016574837267398834, 0.04832032695412636, -0.009738736785948277, 0.029965491965413094, 0.010166068561375141, -0.018544189631938934, 0.005724391434341669, 0.037524450570344925, 0.012075414881110191, -0.025230519473552704, -0.06391148269176483, -0.03488694876432419, -0.037570733577013016, 0.09324214607477188, 0.04035360366106033, -0.013394976034760475, -0.039570100605487823, 0.026815742254257202, -0.057340823113918304, -0.0010398930171504617, 0.0562620684504509, 0.007677112240344286, -0.049541275948286057, 0.07785201817750931, -0.01401494536548853, -0.05382349714636803, -0.0014562433352693915, -0.04756129905581474, 0.04715966060757637, 0.049400050193071365, 0.007686123251914978, 0.001490816124714911, 0.02628076635301113, -0.04740624874830246, 0.05398780480027199, 0.046087633818387985, -0.009779020212590694, 0.02814963273704052, 0.012935413978993893, 0.00028477152227424085, 0.03957945853471756, -0.08090044558048248, 0.020639007911086082, 0.009267262183129787, 0.015461264178156853, -0.007816830649971962, -0.0005132846999913454, 0.029484428465366364, -0.006823874078691006, 0.004333197604864836, -0.1218576580286026, 0.06281521171331406, -0.029650073498487473, 0.024079954251646996, 0.01839750073850155, 0.011542029678821564, -0.0408138670027256, -0.01121287327259779, 0.04604370892047882, 0.005879408214241266, -0.041303765028715134, -0.008866166695952415, -0.01616729609668255, -0.002050679177045822, 0.031908679753541946, -0.017616676166653633, 0.0005703084170818329, -0.005593456793576479, -0.06809195876121521, -0.0078318752348423, -0.008576104417443275, -0.0015357986558228731, 0.0134847117587924, 0.02093501389026642, 0.004460837692022324, 0.04059477895498276, -0.026897761970758438, 0.07544956356287003, 0.03382933884859085, 0.0470583476126194, 0.07979871332645416, 0.015183358453214169, 0.007759851403534412, -0.021830597892403603, 0.0028020208701491356, -0.006986116524785757, -0.007758038584142923, 0.006921747699379921, -0.023107361048460007, -0.10628876835107803, 0.005460477899760008, 0.059628136456012726, 0.018204158172011375, 0.02049538679420948, 0.05900232493877411, 0.029576795175671577, -0.10237722098827362, -0.03691035136580467, 0.026754392310976982, 0.007816826924681664, -0.01866839826107025, 0.05916881188750267, 0.035020019859075546, 0.019922813400626183, -0.004938522353768349, -0.041318558156490326, -0.052201513200998306, 0.05643809214234352, 0.007913180626928806, -0.06429490447044373, 0.003885755781084299, -0.010359511710703373, -0.025197496637701988, 0.025439739227294922, 0.005222938023507595, -0.050018519163131714, -0.08284805715084076, -0.043218329548835754, -0.007955881766974926, -0.020562969148159027, 0.03993218392133713, 0.06865309923887253, -0.029858611524105072, 0.01892697438597679, 0.0334448404610157, -0.030554333701729774, -0.011054162867367268, 0.0037471915129572153, 0.000800025649368763, -0.04922525957226753, -0.02716863714158535, 0.0020183674059808254, -0.012428666464984417, -0.011361189186573029, 0.026797575876116753, 0.02351580560207367, -0.061423689126968384, -0.02601233683526516, -0.02210095524787903, 0.001967372139915824, 0.030564727261662483, -0.08196771144866943, 0.08750218152999878, 0.0750100240111351, 0.014263221062719822, -0.005480533465743065, -0.0019195150816813111, 0.05193013697862625, 0.06929298490285873, -0.03296458721160889, 0.04040097817778587, 0.03081338293850422, 0.050797633826732635, -0.02698918990790844, 0.01651805080473423, 0.026956763118505478, -0.030814360827207565, 0.05001695454120636, 0.03545892983675003, 0.01033230870962143, -0.01656493730843067, -0.06603049486875534, 0.015138435177505016, -0.0278786551207304, 0.012176493182778358, 0.02779550477862358, 0.016583174467086792, -0.03805771470069885, 0.03731796517968178, 0.010183239355683327, -0.0682196095585823, 0.03311539068818092, -0.02285967394709587, -0.04314541816711426, 0.0012667340924963355, -0.0070162201300263405, -0.00032193135120905936, -0.04003458842635155, -0.04306396096944809, 0.04215887933969498, -0.029562536627054214, 0.022218506783246994, 0.03192788362503052, 0.01518604438751936, 0.07109648734331131, -0.038025639951229095, -0.015695365145802498, -0.045105982571840286, -0.07181134819984436, -0.02037091553211212, -0.0738697499036789, 0.009293104521930218, 0.06531541794538498, 0.0011483061825856566, -0.03959440067410469, -0.003520520869642496, -0.023672742769122124, 0.03832900896668434, -0.024741781875491142, -0.03894839435815811, 0.014604413881897926, -0.00679344218224287, 0.08318698406219482, 0.020444171503186226, -0.05466509610414505, 0.007187682203948498, 0.009483897127211094, 0.006342178676277399, -0.0037261603865772486, -0.05066797137260437, 0.0018589897081255913, -0.04772581160068512, 0.02731446549296379, -0.0038972937036305666, 0.012063737958669662, -0.03800715133547783, -0.0237753763794899]') as cosine_distance
FROM langchain_pg_embedding
ORDER BY cosine_distance
LIMIT 2

In [18]:
vectorstore.similarity_search_with_score(query, k=10, filter={
        'id': {'$in': ['788027d1-8288-4afe-9fc7-1e40f0bc22d9', '1bfb41b1-b00d-45e9-baa7-f37304286f4a',]},
        # 'location': {'$in': ["pond", "market"]}
    })

[]

### A function to generate embeddings

In [None]:
import { createClient } from '@supabase/supabase-js'
import { Configuration, OpenAIApi } from 'openai'
import { supabaseClient } from './lib/supabase'

async function generateEmbeddings() {
  const configuration = new Configuration({ apiKey: '<YOUR_OPENAI_API_KEY>' })
  const openAi = new OpenAIApi(configuration)

  const documents = await getDocuments() // Your custom function to load docs

  // Assuming each document is a string
  for (const document of documents) {
    // OpenAI recommends replacing newlines with spaces for best results
    const input = document.replace(/\n/g, ' ')

    const embeddingResponse = await openai.createEmbedding({
      model: 'text-embedding-ada-002',
      input,
    })

    const [{ embedding }] = embeddingResponse.data.data

    // In production we should handle possible errors
    await supabaseClient.from('documents').insert({
      content: document,
      embedding,
    })
  }
}


### A search function

In [None]:
import { serve } from 'https://deno.land/std@0.170.0/http/server.ts'
import 'https://deno.land/x/xhr@0.2.1/mod.ts'
import { createClient } from 'jsr:@supabase/supabase-js@2'
import { Configuration, OpenAIApi } from 'https://esm.sh/openai@3.1.0'
import { supabaseClient } from './lib/supabase'

export const corsHeaders = {
  'Access-Control-Allow-Origin': '*',
  'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type',
}

serve(async (req) => {
  // Handle CORS
  if (req.method === 'OPTIONS') {
    return new Response('ok', { headers: corsHeaders })
  }

  // Search query is passed in request payload
  const { query } = await req.json()

  // OpenAI recommends replacing newlines with spaces for best results
  const input = query.replace(/\n/g, ' ')

  const configuration = new Configuration({ apiKey: '<YOUR_OPENAI_API_KEY>' })
  const openai = new OpenAIApi(configuration)

  // Generate a one-time embedding for the query itself
  const embeddingResponse = await openai.createEmbedding({
    model: 'text-embedding-ada-002',
    input,
  })

  const [{ embedding }] = embeddingResponse.data.data

  // In production we should handle possible errors
  const { data: documents } = await supabaseClient.rpc('match_documents', {
    query_embedding: embedding,
    match_threshold: 0.78, // Choose an appropriate threshold for your data
    match_count: 10, // Choose the number of matches
  })

  return new Response(JSON.stringify(documents), {
    headers: { ...corsHeaders, 'Content-Type': 'application/json' },
  })
})


### A better search function

In [None]:
import { serve } from 'https://deno.land/std@0.170.0/http/server.ts'
import 'https://deno.land/x/xhr@0.2.1/mod.ts'
import { createClient } from 'jsr:@supabase/supabase-js@2'
import GPT3Tokenizer from 'https://esm.sh/gpt3-tokenizer@1.1.5'
import { Configuration, OpenAIApi } from 'https://esm.sh/openai@3.1.0'
import { oneLine, stripIndent } from 'https://esm.sh/common-tags@1.8.2'
import { supabaseClient } from './lib/supabase'

export const corsHeaders = {
  'Access-Control-Allow-Origin': '*',
  'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type',
}

serve(async (req) => {
  // Handle CORS
  if (req.method === 'OPTIONS') {
    return new Response('ok', { headers: corsHeaders })
  }

  // Search query is passed in request payload
  const { query } = await req.json()

  // OpenAI recommends replacing newlines with spaces for best results
  const input = query.replace(/\n/g, ' ')

  const configuration = new Configuration({ apiKey: '<YOUR_OPENAI_API_KEY>' })
  const openai = new OpenAIApi(configuration)

  // Generate a one-time embedding for the query itself
  const embeddingResponse = await openai.createEmbedding({
    model: 'text-embedding-ada-002',
    input,
  })

  const [{ embedding }] = embeddingResponse.data.data

  // Fetching whole documents for this simple example.
  //
  // Ideally for context injection, documents are chunked into
  // smaller sections at earlier pre-processing/embedding step.
  const { data: documents } = await supabaseClient.rpc('match_documents', {
    query_embedding: embedding,
    match_threshold: 0.78, // Choose an appropriate threshold for your data
    match_count: 10, // Choose the number of matches
  })

  const tokenizer = new GPT3Tokenizer({ type: 'gpt3' })
  let tokenCount = 0
  let contextText = ''

  // Concat matched documents
  for (let i = 0; i < documents.length; i++) {
    const document = documents[i]
    const content = document.content
    const encoded = tokenizer.encode(content)
    tokenCount += encoded.text.length

    // Limit context to max 1500 tokens (configurable)
    if (tokenCount > 1500) {
      break
    }

    contextText += `${content.trim()}\n---\n`
  }

  const prompt = stripIndent`${oneLine`
    You are a very enthusiastic Supabase representative who loves
    to help people! Given the following sections from the Supabase
    documentation, answer the question using only that information,
    outputted in markdown format. If you are unsure and the answer
    is not explicitly written in the documentation, say
    "Sorry, I don't know how to help with that."`}

    Context sections:
    ${contextText}

    Question: """
    ${query}
    """

    Answer as markdown (including related code snippets if available):
  `

  // In production we should handle possible errors
  const completionResponse = await openai.createCompletion({
    model: 'text-davinci-003',
    prompt,
    max_tokens: 512, // Choose the max allowed tokens in completion
    temperature: 0, // Set to 0 for deterministic results
  })

  const {
    id,
    choices: [{ text }],
  } = completionResponse.data

  return new Response(JSON.stringify({ id, text }), {
    headers: { ...corsHeaders, 'Content-Type': 'application/json' },
  })
})


OpenAI API responses take longer to depending on the length of the “answer”. ChatGPT has a nice UX for this by streaming the response to the user immediately. You can see a similar effect for the Supabase docs:

The OpenAI API supports completion streaming with Server Side Events. Supabase Edge Functions are run Deno, which also supports Server Side Events. Check out this commit to see how we modified the Function above to build a streaming interface.

Storing embeddings in Postgres opens a world of possibilities. You can combine your search function with telemetry functions, add an user-provided feedback (thumbs up/down), and make your search feel more integrated with your products.

The pgvector extension is available on all new Supabase projects today. To try it out, launch a new Postgres database: database.new