-
Notifications
You must be signed in to change notification settings - Fork 0
/
Generation.jl
642 lines (548 loc) · 19.8 KB
/
Generation.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
# Generation.jl
module Generation
using HTTP
using JSON
using DotEnv
include("SemanticSearch/SemanticSearch.jl")
using .SemanticSearch
export OAIGenerator,
OAIGeneratorWithCorpus,
OllamaGenerator,
OllamaGeneratorWithCorpus,
generate,
generate_with_corpus,
upsert_chunk_to_generator,
upsert_document_to_generator,
upsert_document_from_url_to_generator,
load_OAIGeneratorWithCorpus,
load_OllamaGeneratorWithCorpus,
GeneratorWithCorpus,
check_oai_key_format
const OptionalContext = Union{Vector{String},Nothing}
abstract type Generator end
abstract type GeneratorWithCorpus <: Generator end
CURR_DIR = @__DIR__
SYSTEM_PROMPT = String(read(CURR_DIR*"/system_prompt.txt"))
"""
function check_oai_key_format(key::String)
Uses regex to check if a provided string is in the expected format of an OpenAI
API Key
Parameters
----------
key : String
the key you want to check
Notes
-----
See here for more on the regex:
- https://en.wikibooks.org/wiki/Introducing_Julia/Strings_and_characters#Finding_and_replacing_things_inside_strings
Uses format rule provided here:
- https://github.com/secretlint/secretlint/issues/676
- https://community.openai.com/t/what-are-the-valid-characters-for-the-apikey/288643
Note that this only checks the key format, not whether the key is valid or has not
been revoked.
"""
function check_oai_key_format(key::String)
pattern = r"^sk-[A-Za-z0-9]{20}T3BlbkFJ[A-Za-z0-9]{20}$"
return occursin(pattern, key)
end
"""
struct OAIGenerator
A struct for handling natural language generation via OpenAI's
gpt-3.5-turbo completion endpoint.
Attributes
----------
url : String
the URL of the OpenAI API endpoint
header : Vector{Pair{String, String}}
key-value pairs representing the HTTP headers for the request
body : Dict{String, Any}
this is the JSON payload to be sent in the body of the request
Notes
-----
All natural language generation should be done via a "Generator"
object of some kind for consistency.
When instantiating a new OAIGenerator in an externally-viewable
setting (e.g. notebooks committed to GitHub or a public demo),
it is important to place a semicolon after the command, e.g.
'''generator=load_OAIGeneratorWithCorpus("greek_philosophers");'''
to ensure that your OAI API key is not inadvertently shared.
"""
struct OAIGenerator <: Generator
url::String
header::Vector{Pair{String,String}}
body::Dict{String,Any}
end
"""
struct OllamaGenerator
A struct for handling natural language generation locally.
Attributes
----------
url : String
the URL of the local Ollama API endpoint
header : Dict{String,Any}
HTTP header for the request
body : Dict{String, Any}
this is the JSON payload to be sent in the body of the request
"""
struct OllamaGenerator <: Generator
url::String
header::Dict{String,Any}
body::Dict{String,Any}
end
"""
struct OAIGeneratorWithCorpus
Like OAIGenerator, but has a corpus attached.
Attributes
----------
url : String
the URL of the OpenAI API endpoint
header : Vector{Pair{String, String}}
key-value pairs representing the HTTP headers for the request
body : Dict{String, Any}
this is the JSON payload to be sent in the body of the request
corpus : an initialized Corpus object
the corpus / "vector database" you want to use
Notes
-----
When instantiating a new OAIGenerator in an externally-viewable
setting (e.g. notebooks committed to GitHub or a public demo),
it is important to place a semicolon after the command, e.g.
'''generator=load_OAIGeneratorWithCorpus("greek_philosophers");'''
to ensure that your OAI API key is not inadvertently shared.
"""
struct OAIGeneratorWithCorpus <: GeneratorWithCorpus
url::String
header::Vector{Pair{String,String}}
body::Dict{String,Any}
corpus::Corpus
end
"""
struct OllamaGeneratorWithCorpus
Like OllamaGenerator, but has a corpus attached.
Attributes
----------
url : String
the URL of the local Ollama API endpoint
header : Dict{String,Any}
HTTP header for the request
body : Dict{String, Any}
this is the JSON payload to be sent in the body of the request
corpus : an initialized Corpus object
the corpus / "vector database" you want to use
"""
struct OllamaGeneratorWithCorpus <: GeneratorWithCorpus
url::String
header::Dict{String,Any}
body::Dict{String,Any}
corpus::Corpus
end
"""
function OAIGenerator(auth_token::Union{String, Nothing})
Initializes an OAIGenerator struct.
Parameters
----------
auth_token :: Union{String, Nothing}
this is your OPENAI API key. You can either pass it explicitly as a string
or leave this argument as nothing. In the latter case, we will look in your
environmental variables for "OAI_KEY"
Notes
-----
When instantiating a new OAIGenerator in an externally-viewable
setting (e.g. notebooks committed to GitHub or a public demo),
it is important to place a semicolon after the command, e.g.
'''generator=load_OAIGeneratorWithCorpus("greek_philosophers");'''
to ensure that your OAI API key is not inadvertently shared.
"""
function OAIGenerator(auth_token::Union{String,Nothing} = nothing)
if isnothing(auth_token)
path_to_env = joinpath(dirname(@__DIR__), ".env")
cfg = DotEnv.config(path_to_env)
auth_token = cfg["OAI_KEY"]
end
url = "https://api.openai.com/v1/chat/completions"
header = [
"Content-Type" => "application/json",
"Authorization" => "Bearer $auth_token"
]
body = Dict("model" => "gpt-3.5-turbo")
return OAIGenerator(url, header, body)
end
"""
function OllamaGenerator(model_name::String = "mistral:7b-instruct")
Initializes an OllamaGenerator struct for local text generation.
Parameters
----------
model_name :: String
this is an Ollama model tag. see https://ollama.com/library
defaults to mistral 7b instruct
"""
function OllamaGenerator(model_name::String = "mistral:7b-instruct")
url = "http://localhost:11434/api/generate"
header = Dict("Content-Type" => "application/json")
body = Dict(
"model" => model_name,
"stream" => false,
)
generator = OllamaGenerator(url, header, body)
try
# this will work if you've already pulled the model_name
test = generate(generator, "Hi! This is a test query.")
catch
# if above fails, pull the model
command = `ollama pull $model_name`
run(command)
end
return generator
end
"""
function OAIGeneratorWithCorpus(auth_token::Union{String, Nothing}=nothing, corpus::Corpus)
Initializes an OAIGeneratorWithCorpus.
Parameters
----------
corpus_name : str or nothing
the name that you want to give the database
optional. if left as nothing, we use an in-memory database
auth_token :: Union{String, Nothing}
this is your OPENAI API key. You can either pass it explicitly as a string
or leave this argument as nothing. In the latter case, we will look in your
environmental variables for "OAI_KEY"
embedder_model_path : str
a path to a HuggingFace-hosted model
e.g. "BAAI/bge-small-en-v1.5"
max_seq_len : int
The maximum number of tokens per chunk.
This should be the max sequence length of the tokenizer
Notes
-----
When instantiating a new OAIGenerator in an externally-viewable
setting (e.g. notebooks committed to GitHub or a public demo),
it is important to place a semicolon after the command, e.g.
'''generator=load_OAIGeneratorWithCorpus("greek_philosophers");'''
to ensure that your OAI API key is not inadvertently shared.
"""
function OAIGeneratorWithCorpus(
corpus_name::Union{String,Nothing} = nothing,
auth_token::Union{String,Nothing} = nothing,
embedder_model_path::String = "BAAI/bge-small-en-v1.5",
max_seq_len::Int = 512,
)
base_generator = OAIGenerator(auth_token)
corpus = Corpus(corpus_name, embedder_model_path, max_seq_len)
new_generator = OAIGeneratorWithCorpus(
base_generator.url,
base_generator.header,
base_generator.body,
corpus,
)
return new_generator
end
"""
function OllamaGeneratorWithCorpus(corpus_name::Union{String,Nothing} = nothing, model_name::String = "mistral:7b-instruct", embedder_model_path::String = "BAAI/bge-small-en-v1.5", max_seq_len::Int = 512)
Initializes an OllamaGeneratorWithCorpus.
Parameters
----------
corpus_name : str or nothing
the name that you want to give the database
optional. if left as nothing, we use an in-memory database
model_name :: String
this is an Ollama model tag. see https://ollama.com/library
defaults to mistral 7b instruct
embedder_model_path : str
a path to a HuggingFace-hosted model
e.g. "BAAI/bge-small-en-v1.5"
max_seq_len : int
The maximum number of tokens per chunk.
This should be the max sequence length of the tokenizer
"""
function OllamaGeneratorWithCorpus(
corpus_name::Union{String,Nothing} = nothing,
model_name::String = "mistral:7b-instruct",
embedder_model_path::String = "BAAI/bge-small-en-v1.5",
max_seq_len::Int = 512,
)
base_generator = OllamaGenerator(model_name)
corpus = Corpus(corpus_name, embedder_model_path, max_seq_len)
new_generator = OllamaGeneratorWithCorpus(
base_generator.url,
base_generator.header,
base_generator.body,
corpus,
)
return new_generator
end
"""
function load_OAIGeneratorWithCorpus(corpus_name::String, auth_token::Union{String, Nothing}=nothing)
Loads an existing corpus and uses it to initialize an OAIGeneratorWithCorpus
Parameters
----------
corpus_name : str
the name that you want to give the database
auth_token :: Union{String, Nothing}
this is your OPENAI API key. You can either pass it explicitly as a string
or leave this argument as nothing. In the latter case, we will look in your
environmental variables for "OAI_KEY"
Notes
-----
corpus_name is ordered first because Julia uses positional arguments and
auth_token is optional.
When instantiating a new OAIGenerator in an externally-viewable
setting (e.g. notebooks committed to GitHub or a public demo),
it is important to place a semicolon after the command, e.g.
'''generator=load_OAIGeneratorWithCorpus("greek_philosophers");'''
to ensure that your OAI API key is not inadvertently shared.
"""
function load_OAIGeneratorWithCorpus(
corpus_name::String,
auth_token::Union{String,Nothing} = nothing,
)
base_generator = OAIGenerator(auth_token)
corpus = load_corpus(corpus_name)
new_generator = OAIGeneratorWithCorpus(
base_generator.url,
base_generator.header,
base_generator.body,
corpus,
)
return new_generator
end
"""
function load_OllamaGeneratorWithCorpus(corpus_name::String, model_name::String = "mistral:7b-instruct")
Loads an existing corpus and uses it to initialize an OllamaGeneratorWithCorpus
Parameters
----------
corpus_name : str
the name that you want to give the database
model_name :: String
this is an Ollama model tag. see https://ollama.com/library
defaults to mistral 7b instruct
Notes
-----
corpus_name is ordered first because Julia uses positional arguments and
model_name is optional.
"""
function load_OllamaGeneratorWithCorpus(
corpus_name::String,
model_name::String = "mistral:7b-instruct"
)
base_generator = OllamaGenerator(model_name)
corpus = load_corpus(corpus_name)
new_generator = OllamaGeneratorWithCorpus(
base_generator.url,
base_generator.header,
base_generator.body,
corpus,
)
return new_generator
end
"""
function build_full_query(query::String, context::OptionalContext=nothing)
Given a query and a list of contextual chunks, construct a full query
incorporating both.
Parameters
----------
query : String
the main instruction or query string
context : OptionalContext, which is Union{Vector{String}, Nothing}
optional list of chunks providing additional context for the query
Notes
-----
We base our prompt off the Alpaca prompt, found here: https://github.com/tatsu-lab/stanford_alpaca
with minor modifications that reflect our response preferences.
"""
function build_full_query(query::String, context::OptionalContext = nothing)
full_query = """
Below is an itemization of expectations or preferences to consider while completing any request. Do not refer to these expectations unless explicitly asked about them.
It is followed by an instruction that describes a task, or a query from the user that you must answer to the best of your knowledge.
Write a response that appropriately completes the request.
### Expectatations/Preferences:
$SYSTEM_PROMPT
### Instruction/Query:
$query
### Response:
"""
if !isnothing(context)
context_str = join(["- " * s for s in context], "\n")
full_query = """
Below is an itemization of expectations or preferences to consider while completing any request. Do not refer to these expectations unless explicitly asked about them.
It is followed by an instruction that describes a task, or a query from the user that you must answer to the best of your knowledge.
Write a response that appropriately completes the request.
### Expectatations/Preferences:
$SYSTEM_PROMPT
### Instruction/Query:
$query
### Input:
$context_str
### Response:
"""
end
return full_query
end
"""
generate(generator::Union{OAIGenerator, Nothing}, query::String, context::OptionalContext=nothing, temperature::Float64=0.7)
Generate a response based on a given query and optional context using the specified OAIGenerator. This function constructs a full query, sends it to the OpenAI API, and returns the generated response.
Parameters
----------
generator : Union{OAIGenerator, Nothing}
an initialized generator (e..g OAIGenerator)
leaving this as a union with nothing to note that we may want to support other
generator types in the future (e.g. HFGenerator, etc.)
query : String
the main query string. This is basically your question
context : OptionalContext, which is Union{Vector{String}, Nothing}
optional list of contextual chunk strings to provide the generator additional
context for the query. Ultimately, these will be coming from our vector DB
temperature : Float64
controls the stochasticity of the output generated by the model
"""
function generate(
generator::Union{Generator,GeneratorWithCorpus},
query::String,
context::OptionalContext = nothing,
temperature::Float64 = 0.7,
)
full_query = build_full_query(query, context)
if isa(generator, Union{OAIGenerator,OAIGeneratorWithCorpus})
generator.body["temperature"] = temperature
generator.body["messages"] = [Dict("role" => "user", "content" => full_query)]
body = JSON.json(generator.body)
response = HTTP.request("POST", generator.url, generator.header, body)
if response.status == 200
response_str = String(response.body)
parsed_dict = JSON.parse(response_str)
result = parsed_dict["choices"][1]["message"]["content"]
else
throw(error(
"OpenAI request failed. Status code $(response.status): $(String(response.body))",
))
end
elseif isa(generator, Union{OllamaGenerator,OllamaGeneratorWithCorpus})
options = Dict(
"temperature" => temperature,
"repeat_penalty" => 1.2
)
generator.body["options"] = options
generator.body["prompt"] = full_query
body = JSON.json(generator.body)
response = HTTP.request("POST", generator.url, generator.header, body)
if response.status == 200
response_str = String(response.body)
parsed_dict = JSON.parse(response_str)
result = parsed_dict["response"]
else
throw(error(
"Ollama request failed. Status code $(response.status): $(String(response.body))",
))
end
else
# if we have time, we can use this to generate via something locally-hosted
throw(ArgumentError("generator is not of a supported type."))
end
return result
end
"""
function generate_with_corpus(generator::Union{OAIGenerator, Nothing}, corpus::Corpus, query::String, k::Int=5, temperature::Float64=0.7)
Parameters
----------
generator : Union{OAIGenerator, Nothing}
an initialized generator (e..g OAIGenerator)
leaving this as a union with nothing to note that we may want to support other
generator types in the future (e.g. HFGenerator, etc.)
corpus : an initialized Corpus object
the corpus / "vector database" you want to use
query : String
the main instruction or query string. This is basically your question
k : int
The number of nearest-neighbor vectors to fetch from the corpus to build your context
temperature : Float64
controls the stochasticity of the output generated by the model
"""
function generate_with_corpus(
generator::GeneratorWithCorpus,
query::String,
k::Int = 5,
temperature::Float64 = 0.7,
)
idx_list, doc_names, chunks, distances = search(generator.corpus, query, k)
result = generate(generator, query, chunks, temperature)
return result, idx_list, doc_names, chunks
end
"""
function upsert_chunk_to_generator(generator::GeneratorWithCorpus, chunk::String, doc_name::String)
Equivalent to Backend.upsert_chunk, but takes a GeneratorWithCorpus
instead of a Corpus.
Parameters
----------
generator : any struct that subtypes GeneratorWithCorpus
the generator (with corpus) you want to use
chunk : str
This is the text content of the chunk you want to upsert
doc_name : str
The name of the document that chunk is from. For instance, if you
were upserting all the chunks in an academic paper, doc_name might
be the name of that paper
Notes
-----
One would expect Julia's multiple dispatch to allow us to call this
upsert_chunk, but not so. The conflict arises in Juissie, where
we would have both SemanticSearch and Generation exporting
upsert_chunk. This means any uses of it in Juissie must be
qualified, and without doing so, neither actually gets defined.
"""
function upsert_chunk_to_generator(
generator::GeneratorWithCorpus,
chunk::String,
doc_name::String,
)
upsert_chunk(generator.corpus, chunk, doc_name)
end
"""
function upsert_document_to_generator(generator::GeneratorWithCorpus, doc_text::String, doc_name::String)
Equivalent to Backend.upsert_document, but takes a GeneratorWithCorpus
instead of a Corpus.
Parameters
----------
generator : any struct that subtypes GeneratorWithCorpus
the generator (with corpus) you want to use
doc_text : str
A long string you want to upsert. We will break this into chunks and
upsert each chunk.
doc_name : str
The name of the document the content is from
Notes
-----
See note for upsert_chunk_to_generator - same idea.
"""
function upsert_document_to_generator(
generator::GeneratorWithCorpus,
doc_text::String,
doc_name::String,
)
upsert_document(generator.corpus, doc_text, doc_name)
end
"""
function upsert_document_from_url_to_generator(generator::GeneratorWithCorpus, url::String, doc_name::String, elements::Array{String}=["h1", "h2", "p"])
Equivalent to Backend.upsert_document_from_url, but takes a
GeneratorWithCorpus instead of a Corpus.
Parameters
----------
generator : any struct that subtypes GeneratorWithCorpus
the generator (with corpus) you want to use
url : String
The url you want to scrape for text
doc_name : str
The name of the document the content is from
elements : Array{String}
A list of HTML elements you want to pull the text from
Notes
-----
See note for upsert_chunk_to_generator - same idea.
"""
function upsert_document_from_url_to_generator(
generator::GeneratorWithCorpus,
url::String,
doc_name::String,
elements::Array{String} = ["h1", "h2", "p"],
)
upsert_document_from_url(generator.corpus, url, doc_name, elements)
end
end