In [245]:
import json
import os
import re
from langchain.chains import LLMChain
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from tqdm import tqdm
from langchain.callbacks import get_openai_callback
from paperqa import Docs
from langchain.chains.qa_with_sources.vector_db import VectorDBQAWithSourcesChain
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.base import Docstore
from langchain.docstore.in_memory import InMemoryDocstore
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader
import random
from langchain.agents.agent import AgentExecutor
from langchain.tools.base import BaseTool
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from pydantic import BaseModel
from langchain.vectorstores.base import VectorStore
from langchain.embeddings import OpenAIEmbeddings
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [2]:
def read_jsonl(path: str):
    with open(path) as fh:
        return [json.loads(line) for line in fh.readlines() if line]
    
def extract_ai_answer(completion):
    return completion.split('Answer: ')[-1].strip()

In [6]:
with open('datasets/Strategy_QA/strategyqa_train.json', 'r', encoding='utf-8') as file:
    train_data = json.load(file)
    
with open('datasets/Strategy_QA/strategyqa_train_paragraphs.json', 'r', encoding='utf-8') as file:
    train_paragraphs = json.load(file)
    
with open('datasets/Strategy_QA/strategyqa_test.json', 'r', encoding='utf-8') as file:
    test_data = json.load(file)


In [7]:
all_documents = [{'source' : identifier, 
                  'title' : data['title'], 
                  'content' : data['content']} 
                  for identifier, data in train_paragraphs.items()]

In [11]:
for doc in all_documents:
    print(len(doc['content']))
    

968
153
145
901
607
827
650
506
198
379
450
409
7893
380
1807
4827
310
599
800
743
284
311
403
490
1108
870
266
453
456
665
94
568
415
1254
431
899
152
939
327
290
327
353
468
235
876
733
844
526
119
502
757
990
741
615
1333
668
437
432
496
636
728
668
483
902
357
642
521
542
804
1151
441
421
728
578
412
420
1016
179
195
211
752
436
775
637
1114
1317
744
492
382
1016
1305
586
825
394
373
566
565
422
161
756
420
557
333
205
405
653
403
1246
689
570
236
811
1159
342
77
700
595
473
907
122
534
563
838
515
464
852
515
478
962
354
436
1120
376
689
456
767
178
461
877
280
365
130
139
505
334
477
348
527
1293
397
581
465
690
538
238
449
870
300
913
336
545
386
1322
441
404
566
526
353
2025
327
1472
658
534
440
704
1405
283
495
247
442
143
651
293
446
549
469
440
384
417
723
493
750
1154
607
333
458
716
265
338
486
599
2823
785
391
448
277
1288
319
292
799
710
379
306
597
740
807
273
788
487
427
273
481
920
681
2363
420
2253
348
1544
477
985
514
282
509
408
541
345
435
963
1365
1577
687
387
35

429
891
257
787
480
374
239
490
847
376
904
398
634
643
783
168
917
598
519
958
845
139
256
829
467
603
599
277
108
887
696
1150
441
465
503
1013
1258
1340
797
418
439
954
164
603
490
157
1165
312
659
936
657
864
1413
324
515
740
439
862
394
905
4714
378
805
983
501
337
525
253
791
176
349
742
694
897
477
654
744
598
131
2645
557
1846
380
489
542
1693
558
687
210
717
191
1586
902
1242
547
1208
922
546
528
287
634
398
719
1097
321
905
894
873
1549
369
296
375
2198
772
152
457
1175
538
277
657
485
879
736
124
217
340
290
314
131
969
130
674
1011
436
565
725
316
559
443
901
620
582
579
379
1274
658
1362
693
269
575
297
167
587
1302
689
713
491
299
609
91
576
2482
192
745
765
1651
989
503
544
630
743
768
173
268
458
573
284
345
387
596
500
975
642
800
443
546
628
223
714
735
422
397
514
1185
261
231
331
56
740
441
91
3535
441
499
602
320
713
914
279
752
332
392
367
608
336
596
681
547
793
568
1116
351
248
366
575
463
324
378
526
159
865
565
1576
999
876
1203
257
761
354
375
692
498
222
276

260
628
797
437
613
180
482
453
640
1752
599
137
361
399
262
269
425
715
275
470
689
711
1136
536
426
304
662
1001
795
347
165
217
535
593
161
673
98
325
373
742
523
401
149
813
457
410
755
904
836
368
369
264
385
578
830
310
1129
605
132
1373
683
340
405
443
440
376
603
338
589
1290
651
907
578
387
969
2048
554
182
311
292
691
564
316
820
240
446
399
845
605
230
1052
792
599
445
558
217
826
350
1631
596
348
357
105
174
543
212
400
544
215
197
313
513
369
377
480
468
275
494
240
2335
303
324
137
421
2458
1283
603
504
419
714
918
942
573
1034
695
380
835
269
161
155
371
938
176
1258
502
354
440
760
297
268
576
441
164
733
534
848
859
647
571
204
422
114
303
693
471
1184
827
921
1159
294
497
256
361
1079
181
370
296
257
931
777
648
1500
565
501
743
800
218
514
976
386
1398
413
494
503
581
1117
1672
158
172
485
297
304
576
794
618
1173
726
359
252
343
609
261
457
645
159
1126
819
768
1008
282
441
488
606
330
724
461
871
622
369
302
842
419
107
759
449
810
955
1793
327
317
215
771
756
322


1734
753
1320
584
1687
792
496
720
412
222
711
403
277
541
508
350
491
556
161
555
1049
731
380
1101
544
665
616
431
1060
312
91
394
477
230
632
242
331
700
1171
1316
647
307
217
635
141
609
95
664
244
1702
175
475
214
426
367
530
1323
439
839
486
671
616
578
832
760
614
481
514
369
534
499
354
778
543
456
743
167
623
718
546
563
774
1422
568
425
183
877
308
663
228
475
542
496
484
1208
659
469
1397
963
461
462
106
547
715
314
1127
855
417
294
910
611
628
468
309
543
149
92
243
376
665
864
355
104
566
811
500
544
1064
640
694
284
704
1370
501
369
2802
310
817
300
605
668
723
1003
311
671
629
287
667
571
859
204
243
336
798
810
494
619
829
1094
436
485
489
75
492
333
999
291
151
276
486
1235
571
555
137
367
506
1314
1234
550
297
486
412
761
522
1004
173
294
891
340
288
1089
770
1039
697
253
474
868
248
1171
1067
736
644
318
587
210
688
390
379
378
989
865
335
362
727
548
332
4904
743
633
670
300
956
608
558
853
353
285
482
405
1162
404
337
526
771
245
879
355
416
651
1056
200
1529
733
9

In [29]:
PATH_STRATEGYQA = '../datasets/Strategy_QA/strategyqa_train.json'
PATH_STRATEGYQA_DOCS = "../datasets/Strategy_QA/strategyqa_train_paragraphs.json"
INDEX_PATH = '../VectorStore/'

def index_docs(train_paragraphs): 
    train_paragraphs = load_strategyqa(PATH_STRATEGYQA_DOCS)

    all_documents = [{'source' : identifier, 
                      'title' : data['title'], 
                      'content' : data['content']} 
                     for identifier, data in train_paragraphs.items()]
        
    first_chunk = all_documents[0]
    first_metadata = {key : first_chunk[key] for key in first_chunk if key in ['source', 'title']}
    db = FAISS.from_texts(texts=[first_chunk['content']], embedding=OpenAIEmbeddings(), metadatas=[first_metadata])
    
    
    batch_text_data = []
    batch_meta_data = []
    batch_limit = 100
    
    for i, example in enumerate(tqdm(all_documents[1:])):
        text_data = example['content']
        meta_data = {key : example[key] for key in example if key in ['source', 'title']}

        batch_text_data.append(text_data)
        batch_meta_data.append(meta_data)
        
        try:
            if len(batch_text_data) >= batch_limit:                
                db.add_texts(batch_text_data, batch_meta_data)
                batch_text_data = []
                batch_meta_data = []

            elif i == len(all_chunks) - 1:
                if batch_text_data != [] and batch_meta_data != []:
                    db.add_texts(batch_text_data, batch_meta_data)
        except Exception as e:
            print(f'Could not index the batch: {batch_meta_data}')
            print(f'Error Message: {e}')
    
    db.save_local(INDEX_PATH)
    return 'Indexing Done Successfully!'


In [226]:
class QAWrapper(BaseModel):
    docs: Docs

    class Config:
        arbitrary_types_allowed = True

    def run(self, query: str) -> str:
        print(len(self.docs.docs))
        print(query)
        # Use k text passages in up to max_sources different documents
        answer = self.docs.query(query, k=10, max_sources=10)
        return answer.formatted_answer
    
class QATool(BaseTool):
    name = "Intermediate Answer"
    description = """This tool is useful for answering questions from the content of documents in Docs.
    Input should be a question."""
    api_wrapper: QAWrapper = None

    def __init__(self, docs: Docs):
        api_wrapper = QAWrapper(docs=docs)
        super().__init__(api_wrapper=api_wrapper)

    def _run(self, query: str) -> str:
        """Use the tool."""
        return self.api_wrapper.run(query)

    async def _arun(self, query: str) -> str:
        """Use the tool."""
        raise NotImplementedError

In [184]:
len(train_paragraphs.keys())

9251

In [188]:
len(set(train_paragraphs.keys()))

9251

In [5]:
db = FAISS.load_local("VectorStore/", OpenAIEmbeddings())

In [6]:
len(db.index_to_docstore_id)

9201

In [31]:
train_data[0]

{'qid': 'b8677742616fef051f00',
 'term': 'Genghis Khan',
 'description': 'founder and first Great Khan of the Mongol Empire',
 'question': 'Are more people today related to Genghis Khan than Julius Caesar?',
 'answer': True,
 'facts': ['Julius Caesar had three children.',
  'Genghis Khan had sixteen children.',
  'Modern geneticists have determined that  out of every 200 men today has DNA that can be traced to Genghis Khan.'],
 'decomposition': ['How many kids did Julius Caesar have?',
  'How many kids did Genghis Khan have?',
  'Is #2 greater than #1?'],
 'evidence': [[[['Caesarion-2', 'Julia (daughter of Caesar)-1']],
   [['Alakhai Bekhi-1', 'Tolui-1'], 'no_evidence'],
   ['operation']],
  [[['Julius Caesar-75']], [['Genghis Khan-17']], ['operation']],
  [[['Gaius Julius Caesar-7']],
   [['Genghis Khan-15'], 'no_evidence'],
   ['no_evidence', 'operation']]]}

In [30]:
def get_relevant_docs(query, index, k=3):
    return index.similarity_search(query, k=k)

In [37]:
relevant_docs = get_relevant_docs('How many kids did Julius Caesar have?', db, 20)

'Julius_Caesar_75'

In [None]:
docs = Docs()
directory = 'Documents/StrategyQA_Relevant_Docs/'

for doc in relevant_docs:
    doc_content = doc.page_content
    doc_id = re.sub(r"[^\w\d]", "_", f"{relevant_docs[0].metadata['source']}")
    
    file_full_path = directory + doc_id + '.txt'
    # Open a file in write mode
    with open(file_full_path, "w") as file:

# Write some text to the file
        file.write("Hello, world!\n")
        file.write("This is a text file written in Python.\n")



In [9]:
class QASWrapper(BaseModel):
    db: VectorStore

    class Config:
        arbitrary_types_allowed = True

    def run(self, query: str) -> str:
        qa_with_sources = VectorDBQAWithSourcesChain.from_chain_type(
                                                        llm=OpenAI(),
                                                        chain_type="stuff",
                                                        vectorstore=db)
        
        qa_with_sources.k = 10
        return f'\n{qa_with_sources({"question": query}, return_only_outputs=True)}\n'
        
        
        # Use k text passages in up to max_sources different documents
        #answer = self.docs.query(query, k=10, max_sources=5)
        #return answer.formatted_answer
    
class QASTool(BaseTool):
    name = "Intermediate Answer"
    description = """This tool is useful for answering questions.
    In principle, it can also fail, telling you that there is insufficient information, or by saying that there is no
    clear answer. Input should be a question."""
    api_wrapper: QASWrapper = None

    def __init__(self, db: VectorStore):
        api_wrapper = QASWrapper(db=db)
        super().__init__(api_wrapper=api_wrapper)

    def _run(self, query: str) -> str:
        """Use the tool."""
        return self.api_wrapper.run(query)

    async def _arun(self, query: str) -> str:
        """Use the tool."""
        raise NotImplementedError

In [10]:
query = 'How many kids did Julius Caesar have?'
qas_tool = QASTool(db)
qas_tool._run(query)



"\n{'answer': ' Julius Caesar had one known child, Julia, from his first or second marriage to Cornelia. He also had a suspected child, Marcus Junius Brutus, and a confirmed son, Caesarion, by Cleopatra VII.\\n', 'sources': 'Julius Caesar-75, Julia (daughter of Caesar)-1, Caesarion-2, Size of the Roman army-7, Gaius Julius Caesar-7, Medea-10, Assassination of Julius Caesar-1, Julius Caesar-1, Gallic Wars-4, Larry King-43'}\n"

In [11]:
train_data[0]

{'qid': 'b8677742616fef051f00',
 'term': 'Genghis Khan',
 'description': 'founder and first Great Khan of the Mongol Empire',
 'question': 'Are more people today related to Genghis Khan than Julius Caesar?',
 'answer': True,
 'facts': ['Julius Caesar had three children.',
  'Genghis Khan had sixteen children.',
  'Modern geneticists have determined that  out of every 200 men today has DNA that can be traced to Genghis Khan.'],
 'decomposition': ['How many kids did Julius Caesar have?',
  'How many kids did Genghis Khan have?',
  'Is #2 greater than #1?'],
 'evidence': [[[['Caesarion-2', 'Julia (daughter of Caesar)-1']],
   [['Alakhai Bekhi-1', 'Tolui-1'], 'no_evidence'],
   ['operation']],
  [[['Julius Caesar-75']], [['Genghis Khan-17']], ['operation']],
  [[['Gaius Julius Caesar-7']],
   [['Genghis Khan-15'], 'no_evidence'],
   ['no_evidence', 'operation']]]}

In [12]:
llm = OpenAI(temperature=0.2) #, model_name='gpt-3.5-turbo')
tools = [QASTool(db)]
my_agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

In [17]:
my_agent.run('Who are the member of The Police?')



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I should research the members of the band
Action: Intermediate Answer
Action Input: Who are the members of The Police?[0m




Observation: [36;1m[1;3m
{'answer': ' The members of The Police are Sting (lead vocals, bass guitar), Andy Summers (guitar), and Stewart Copeland (drums, percussion).\n', 'sources': 'The Police-1, The Police-8'}
[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: The members of The Police are Sting (lead vocals, bass guitar), Andy Summers (guitar), and Stewart Copeland (drums, percussion).[0m

[1m> Finished chain.[0m


'The members of The Police are Sting (lead vocals, bass guitar), Andy Summers (guitar), and Stewart Copeland (drums, percussion).'

In [23]:
llm = OpenAI(temperature=0.2) #, model_name='gpt-3.5-turbo')
tools = [QASTool(db)]
self_ask_with_search = initialize_agent(tools, llm, agent=AgentType.SELF_ASK_WITH_SEARCH, verbose=True)

In [207]:
new_template = """
[Example 1]
Question: Do hamsters provide food for any animals?
Output:
Sub Question #0 : What type of animals are hamsters?
Sub Answer #0 : Hamsters are prey animals.
Sub Question #1 : Can prey animals be food for other animals?
Sub Answer #1 : Prey are food for predators.
Sub Question #2 : Do hamsters provide food for any animals?
Sub Answer #2 : Since hamsters are prey animals, and prey are food for predetors, hamsters provide food for some animals.
Final Answer: YES

[Example 2]
Question: Could Brooke Shields succeed at University of Pennsylvania?
Output:
Sub question #0 : What university did Brooke Shields went to?
Sub answer #0 : Brooke Shields went to Princeton University.
Sub question #1 : Did Brooke Shields succeed at Princeton University?
Sub answer #1 : At Princeton University, she got all As and Bs while pursing her bachelor's degree in French literature, meaning she had a successful school life.
Sub question #2 : How rigorous is Princeton University compared to University of Pennsylvania?
Sub answer #2 : Princeton University is about as academically rigorous as the University of Pennsylvania because they have a similar ranking according to U.S. News Rankings.
Sub question #3 : Could Brooke Shields succeed at University of Pennsylvania?
Sub answer #3 : Since University of Pennsylvania and University of Princeton are in similar circumstances, Brooke Shields has been successful in University of Princeton, Brooke Shields could also succeed at the University of Pennsylvania.
Final Answer: YES

[Example 3]
Question: Hydrogen\u2019s atomic number squared exceeds number of Spice Girls?
Output:
Sub question #0 : What is the atomic number of Hydrogen?
Sub answer #0 : Hydrogen has an atomic number of 1.
Sub question #1 : What is 1 squared?
Sub answer #1 : 1 squared is 1.
Sub question #2 : How much Spice Girls are there?
Sub answer #2 : There are 5 Spice Girls.
Sub question #3 : Hydrogen\u2019s atomic number squared exceeds number of Spice Girls?
Sub answer #3 : Since Hydrogen's atomic number squared is 1, the number of Spice Girls are 5, and 1 is smaller than 5, Hydrogen\u2019s atomic number squared is less than the number of Spice Girls.
Final Answer: NO

[Example 4]
Question: Is it common to see frost during some college commencements?
Output:
Sub question #0 : When does College commencement ceremonies usually happen?
Sub answer #0 : College commencement ceremonies can happen in December, May, and June.
Sub question #1 : Does it usually frost in December?
Sub answer #1 : December is in the winter, so there can be frost.
Sub question #2 : Is it common to see frost during some college commencements?
Sub answer #2 : Since there can be frost in December and a college commencement are held in December, there could be frost at some commencements.
Final Answer: YES

[Example 5]
Question: Could a llama birth twice during War in Vietnam (1945-46)?
Output:
Sub question #0 : How long was the Vietnam war?
Sub answer #0 : The War in Vietnam was 6 months.
Sub question #1 : How long is the gestation period?
Sub answer #1 : The gestation period for a llama is 11 months.
Sub question #2 : How long does it take for a llama to birth twice?
Sub answer #2 : Since the gestation period for a llama is 11 months, and 11 times 2 is 22, it will take 22 months.
Sub question #3 : Could a llama birth twice during War in Vietnam (1945-46)?
Sub answer #3 : Since it takes 22 months for a llama to birth twice, War in Vietnam was 6 months, and 22 is bigger than 6, llama could not give birth twice during the War in Vietnam.
Final Answer: NO

[Example 6]
Question: Would a pear sink in water?
Output:
Sub question #0 : What is the density of a pear?
Sub answer #0 : The density of a pear is about 0.6g/cm3.
Sub question #1 : What is the density of water?
Sub answer #1 : The density of water is 1g/cm3.
Sub question #2 : Is the density of pear smaller than water?
Sub answer #2 : Since 0.6 is smaller than 1, the density of pear is smaller than water.
Sub question #3 : If the density of an object is less than water, what happens?
Sub answer #3 : Objects less dense than water float.
Sub question #4 : Would a pear sink in water?
Sub answer #4 : Since a pear has a smaller density than water, a pear would float.
Final Answer: NO

Question: {input}
Output: 
{agent_scratchpad}

"""

print(new_template)

# Reference : https://github.com/SeungoneKim/CoTEVer/blob/main/Middleware/CoTEVer_AI/prompts/demo.txt


[Example 1]
Question: Do hamsters provide food for any animals?
Output:
Sub Question #0 : What type of animals are hamsters?
Sub Answer #0 : Hamsters are prey animals.
Sub Question #1 : Can prey animals be food for other animals?
Sub Answer #1 : Prey are food for predators.
Sub Question #2 : Do hamsters provide food for any animals?
Sub Answer #2 : Since hamsters are prey animals, and prey are food for predetors, hamsters provide food for some animals.
Final Answer: YES

[Example 2]
Question: Could Brooke Shields succeed at University of Pennsylvania?
Output:
Sub question #0 : What university did Brooke Shields went to?
Sub answer #0 : Brooke Shields went to Princeton University.
Sub question #1 : Did Brooke Shields succeed at Princeton University?
Sub answer #1 : At Princeton University, she got all As and Bs while pursing her bachelor's degree in French literature, meaning she had a successful school life.
Sub question #2 : How rigorous is Princeton University compared to University

In [25]:
self_ask_with_search.agent.llm_chain.prompt.template = new_template

In [26]:
self_ask_with_search.run(train_data[0]['question'])



[1m> Entering new AgentExecutor chain...[0m


InvalidRequestError: This model's maximum context length is 4097 tokens, however you requested 4347 tokens (4091 in your prompt; 256 for the completion). Please reduce your prompt; or completion length.

In [31]:
full_prompt = new_template
for el in get_relevant_docs(train_data[0]['question'], db, 10):
    full_prompt += el.page_content
    

In [36]:
get_relevant_docs(train_data[0]['question'], db, 10)

[Document(page_content="Genghis Khan's four sons and other immediate descendants are famous by names and by deeds. Later Asian rulers attempted to claim descent from the Borjigin even on weak grounds, such as Mongol matrilineal descent.[citation needed] In the 14th century, valid sources (heavily dependent on Rashid-al-Din Hamadani and other Muslim historians) all but dried up.", metadata={'source': 'Descent from Genghis Khan-2', 'title': 'Descent from Genghis Khan'}),
 Document(page_content="Genghis Khan (born Temüjin Borjigin, c.\u20091162\xa0– August 18, 1227), also officially Genghis Emperor, was the founder and first Great Khan and Emperor of the Mongol Empire, which became the largest contiguous empire in history after his death. He came to power by uniting many of the nomadic tribes of Northeast Asia. After founding the Empire and being proclaimed Genghis Khan, he launched the Mongol invasions that conquered most of Eurasia. Campaigns initiated in his lifetime include those agai

In [39]:
import tiktoken

def num_tokens_from_string(string) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.encoding_for_model("text-davinci-003")
    num_tokens = len(encoding.encode(string))
    return num_tokens

num_tokens_from_string(full_prompt)

2727

In [5]:
train_data[0]

{'qid': 'b8677742616fef051f00',
 'term': 'Genghis Khan',
 'description': 'founder and first Great Khan of the Mongol Empire',
 'question': 'Are more people today related to Genghis Khan than Julius Caesar?',
 'answer': True,
 'facts': ['Julius Caesar had three children.',
  'Genghis Khan had sixteen children.',
  'Modern geneticists have determined that  out of every 200 men today has DNA that can be traced to Genghis Khan.'],
 'decomposition': ['How many kids did Julius Caesar have?',
  'How many kids did Genghis Khan have?',
  'Is #2 greater than #1?'],
 'evidence': [[[['Caesarion-2', 'Julia (daughter of Caesar)-1']],
   [['Alakhai Bekhi-1', 'Tolui-1'], 'no_evidence'],
   ['operation']],
  [[['Julius Caesar-75']], [['Genghis Khan-17']], ['operation']],
  [[['Gaius Julius Caesar-7']],
   [['Genghis Khan-15'], 'no_evidence'],
   ['no_evidence', 'operation']]]}

In [22]:
train_data[0]

{'qid': 'b8677742616fef051f00',
 'term': 'Genghis Khan',
 'description': 'founder and first Great Khan of the Mongol Empire',
 'question': 'Are more people today related to Genghis Khan than Julius Caesar?',
 'answer': True,
 'facts': ['Julius Caesar had three children.',
  'Genghis Khan had sixteen children.',
  'Modern geneticists have determined that  out of every 200 men today has DNA that can be traced to Genghis Khan.'],
 'decomposition': ['How many kids did Julius Caesar have?',
  'How many kids did Genghis Khan have?',
  'Is #2 greater than #1?'],
 'evidence': [[[['Caesarion-2', 'Julia (daughter of Caesar)-1']],
   [['Alakhai Bekhi-1', 'Tolui-1'], 'no_evidence'],
   ['operation']],
  [[['Julius Caesar-75']], [['Genghis Khan-17']], ['operation']],
  [[['Gaius Julius Caesar-7']],
   [['Genghis Khan-15'], 'no_evidence'],
   ['no_evidence', 'operation']]]}

In [73]:
retriever = db.as_retriever(search_kwargs={"k": 30, 'reduce_k_below_max_tokens' : True})
qa = RetrievalQA.from_chain_type(llm=OpenAI(model_name='gpt-3.5-turbo'), chain_type="stuff", retriever=retriever)
qa.run('How many kids did Julius Caesar have?')

'Julius Caesar had at least three children: Julia by his first wife Cornelia, Caesarion by his lover Cleopatra VII, and a son named Gaius Julius Caesar Octavianus whom he posthumously adopted and who later became Emperor Augustus. He was also believed to have had other suspected children, including Brutus and Junia Tertia.'

In [None]:
ocs = retriever.get_relevant_documents("what did he say abotu ketanji brown jackson")

In [21]:
"""Attempt to implement MRKL systems as described in arxiv.org/pdf/2205.00445.pdf."""
from __future__ import annotations

# import re
from typing import Any, List, Optional, Sequence, Tuple

from langchain.agents.agent import Agent
from langchain.callbacks.base import BaseCallbackManager
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.schema import BaseLanguageModel
from langchain.tools.base import BaseTool

# flake8: noqa
PREFIX = """You are a thoughtful scholar who is willing to answer a question in great detail. 
You can repeatedly (up to 4 times) ask sub-questions to an assistant in order to gather more information. You should generate
for each sub-question a sub-answer before generating the final answer. The final answer should be either YES or NO.
All sub-questions asked to the assistant should be completely self-contained without requiring access to previous sub-questions and answers."""
FORMAT_INSTRUCTIONS = """Use the following format:

Question: the input question you must answer
Thought: reflect on what information is missing in order to answer the original question
Sub-question: a sub-question you want the assistant to answer 
Sub-answer: the answer to the sub-question
... (this Thought/Sub-question/Assistant answer loop can repeat up to 10 times)
Final Answer: the final answer to the original input question"""
SUFFIX = """Begin!

Question: {input}
{agent_scratchpad}"""



FINAL_ANSWER_ACTION = "Final Answer:"


def get_action_and_input(llm_output: str) -> Tuple[str, str]:
    """Parse out the action and input from the LLM output.
    Note: if you're specifying a custom prompt for the ZeroShotAgent,
    you will need to ensure that it meets the following Regex requirements.
    The string starting with "Action:" and the following string starting
    with "Action Input:" should be separated by a newline.
    """
    print(llm_output)
    if FINAL_ANSWER_ACTION in llm_output:
        return "Final Answer", llm_output.split(FINAL_ANSWER_ACTION)[-1].strip()
    # \s matches against tab/newline/whitespace
    # regex = r"Sub-question: (.*)"
    # match = re.search(regex, llm_output, re.DOTALL)
    # if not match:
    #     raise ValueError(f"Could not parse LLM output: `{llm_output}`")
    # sub_question = match.group(1).strip()
    sub_question = llm_output.split("Sub-question:")[-1].strip()
    return "Research", sub_question.strip(" ").strip('"')


class TopDownAgent(Agent):
    """Agent that patiently ponders about a question."""

    @property
    def _agent_type(self) -> str:
        """Return Identifier of agent type."""
        return "top-down-scholar"

    @property
    def observation_prefix(self) -> str:
        """Prefix to append the observation with."""
        return "Sub-answer: "

    @property
    def llm_prefix(self) -> str:
        """Prefix to append the llm call with."""
        return "Thought:"

    @classmethod
    def create_prompt(
        cls,
        tools: Sequence[BaseTool],
        prefix: str = PREFIX,
        suffix: str = SUFFIX,
        format_instructions: str = FORMAT_INSTRUCTIONS,
        input_variables: Optional[List[str]] = None,
    ) -> PromptTemplate:
        """Create prompt in the style of the zero shot agent.
        Args:
            tools: List of tools the agent will have access to, used to format the
                prompt.
            prefix: String to put before the list of tools.
            suffix: String to put after the list of tools.
            input_variables: List of input variables the final prompt will expect.
        Returns:
            A PromptTemplate with the template assembled from the pieces here.
        """
        format_instructions = format_instructions.format()
        template = "\n\n".join([prefix, format_instructions, suffix])
        if input_variables is None:
            input_variables = ["input", "agent_scratchpad"]
        return PromptTemplate(template=template, input_variables=input_variables)

    @classmethod
    def from_llm_and_tools(
        cls,
        llm: BaseLanguageModel,
        tools: Sequence[BaseTool],
        callback_manager: Optional[BaseCallbackManager] = None,
        prefix: str = PREFIX,
        suffix: str = SUFFIX,
        format_instructions: str = FORMAT_INSTRUCTIONS,
        input_variables: Optional[List[str]] = None,
        **kwargs: Any,
    ) -> Agent:
        """Construct an agent from an LLM and tools."""
        cls._validate_tools(tools)
        prompt = cls.create_prompt(
            tools,
            prefix=prefix,
            suffix=suffix,
            format_instructions=format_instructions,
            input_variables=input_variables,
        )
        llm_chain = LLMChain(
            llm=llm,
            prompt=prompt,
            callback_manager=callback_manager,
        )
        tool_names = [tool.name for tool in tools]
        return cls(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)

    @classmethod
    def _validate_tools(cls, tools: Sequence[BaseTool]) -> None:
        """Validate that the tools are valid for this agent."""
        for tool in tools:
            if tool.name != "Research":
                raise ValueError(
                    f"Patient Scholar Agent can only use Research tool, but got {tool.name}"
                )
        if len(tools) != 1:
            raise ValueError(
                f"Patient Scholar Agent can only use one tool, but got {len(tools)}"
            )

    def _extract_tool_and_input(self, text: str) -> Optional[Tuple[str, str]]:
        action, sub_question = get_action_and_input(text)
        # print(sub_question)
        return action, sub_question


In [28]:
len(train_paragraphs.keys())

9251

In [22]:
llm = OpenAI(temperature=0.2)
tools = [QASTool(db)]
my_agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

In [25]:
base_agent = my_agent

def _run_agent(query):
    # print(f"Running agent on query:{query}")
    return base_agent.run(query)

search_tool = Tool(
    name="Research",
    description="Answers a question by researching relevant documents",
    func=_run_agent,
)
agent_obj = TopDownAgent.from_llm_and_tools(llm, [search_tool], verbose=True)
agent = AgentExecutor.from_agent_and_tools(
    agent_obj,
    [search_tool],
    verbose=True,
    early_stopping_method="generate"
)

In [27]:
agent.run(train_data[0]['question'])



[1m> Entering new AgentExecutor chain...[0m
Thought: I need to know the population of people living today and the number of descendants of each historical figure.

Sub-question: How many people are living today?
[32;1m[1;3mThought: I need to know the population of people living today and the number of descendants of each historical figure.

Sub-question: How many people are living today?[0m

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find a reliable source of information.
Action: Intermediate Answer
Action Input: How many people are living today?[0m
Observation: [36;1m[1;3m
{'answer': ' The estimated world population as of March 2020 is 7.8 billion people.\n', 'sources': 'World population-1'}
[0m
Thought:[32;1m[1;3m I now know the final answer.
Final Answer: The estimated world population as of March 2020 is 7.8 billion people.[0m

[1m> Finished chain.[0m

Sub-answer: [36;1m[1;3mThe estimated world population as of March 2020 is 7.8 billion 

'YES, there are more people today related to Genghis Khan than Julius Caesar.'

In [26]:
train_data[0]

{'qid': 'b8677742616fef051f00',
 'term': 'Genghis Khan',
 'description': 'founder and first Great Khan of the Mongol Empire',
 'question': 'Are more people today related to Genghis Khan than Julius Caesar?',
 'answer': True,
 'facts': ['Julius Caesar had three children.',
  'Genghis Khan had sixteen children.',
  'Modern geneticists have determined that  out of every 200 men today has DNA that can be traced to Genghis Khan.'],
 'decomposition': ['How many kids did Julius Caesar have?',
  'How many kids did Genghis Khan have?',
  'Is #2 greater than #1?'],
 'evidence': [[[['Caesarion-2', 'Julia (daughter of Caesar)-1']],
   [['Alakhai Bekhi-1', 'Tolui-1'], 'no_evidence'],
   ['operation']],
  [[['Julius Caesar-75']], [['Genghis Khan-17']], ['operation']],
  [[['Gaius Julius Caesar-7']],
   [['Genghis Khan-15'], 'no_evidence'],
   ['no_evidence', 'operation']]]}

In [246]:
all_documents = [{'source' : identifier, 
                  'title' : data['title'], 
                  'content' : data['content']} 
                  for identifier, data in train_paragraphs.items()]



In [247]:
directory = 'Documents/StrategyQA_Docs/'
docs = Docs()
failed_docs = []

for doc in tqdm(all_documents):
    doc_id = re.sub(r"[^\w\d]", "_", doc['source'])
    citation = doc['title']
    
    file_full_path = directory + doc_id + '.txt'
    with open(file_full_path, 'w', encoding='utf-8') as file:
        file.write(doc['content'])
        
    try:
        docs.add(path=file_full_path, citation=citation, key=doc_id)
    except Exception as e:
        print(e, type(e))
        failed_docs.append(doc_id)
        

100%|█████████████████████████████████████████████████████████████████████████████| 9251/9251 [00:13<00:00, 707.28it/s]


In [248]:
np.unique([len(docs.docs[el]['texts']) for el in docs.docs], return_counts=True)

(array([1]), array([9251], dtype=int64))

In [236]:
answer = tool.api_wrapper.docs.query("Did any of the members of The Police have any law enforcement training?", k=10, max_sources=10)

In [237]:
print(answer.formatted_answer)

Question: Did any of the members of The Police have any law enforcement training?

I cannot answer this question due to insufficient information.

Tokens Used: 3193 Cost: $0.01


In [249]:
answer = docs.query("Did any of the members of The Police have any law enforcement training?", k=10, max_sources=10)

In [250]:
print(answer.formatted_answer)

Question: Did any of the members of The Police have any law enforcement training?

I cannot answer. The provided context does not provide any information on whether or not any members of The Police had law enforcement training.

References

1. (Law_enforcement_officer_13): Law enforcement officer

Tokens Used: 3488 Cost: $0.01


In [243]:
len(docs.docs)

9251

In [231]:
tool = QATool(docs)
print(tool.run('Did any of the members of The Police have any law enforcement training?'))

9251
Did any of the members of The Police have any law enforcement training?
Question: Did any of the members of The Police have any law enforcement training?

I cannot answer this question due to insufficient information.

Tokens Used: 3196 Cost: $0.01


In [251]:
llm = OpenAI(temperature=0.2) #, model_name='gpt-3.5-turbo')
tools = [QATool(docs)]
new_agent = initialize_agent(tools, llm, agent=AgentType.SELF_ASK_WITH_SEARCH, verbose=True)

In [261]:
train_data[11]

{'qid': 'b77d2efee37741e44c32',
 'term': 'Space Race',
 'description': 'Competition between the USSR and the USA to explore space',
 'question': 'Did the Space Race use relay batons?',
 'answer': False,
 'facts': ['The Space Race was a competition between the USA and USSR regarding spaceflight and exploration',
  'Relay batons are used in relay races',
  'Relay races are athletic track and field events'],
 'decomposition': ['What was the Space Race?',
  'What are relay batons used for?',
  'Is #1 the same as #2?'],
 'evidence': [[[['Space Race-1']],
   [['Relay race-11']],
   [['Relay race-11', 'Space Race-1'], 'operation']],
  [[['Space Race-1']], [['Relay race-1']], ['operation']],
  [[['Space Race-1']], [['Relay race-1']], ['operation']]]}

In [263]:
new_agent.return_intermediate_steps('What are relay batons used for?')



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m No.
So the final answer is: Relay batons are used in track and field events to transfer the baton from one runner to the next during a relay race.[0m

[1m> Finished chain.[0m


'Relay batons are used in track and field events to transfer the baton from one runner to the next during a relay race.'

In [271]:
llm = OpenAI(temperature=0.2) #, model_name='gpt-3.5-turbo')
tools = [QATool(docs)]
react_agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True, max_iterations=4)

In [273]:
new_temp = 'You are willing to answer questions that require reasoning steps. You should think how to answer question step by step. The final answer must either "YES" or "NO".\n'

react_agent.agent.llm_chain.prompt.template = new_temp + react_agent.agent.llm_chain.prompt.template

In [276]:
react_agent.run(train_data[0]['question'])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find out how many people are related to each person.
Action: Intermediate Answer
Action Input: How many people are related to Genghis Khan and Julius Caesar?[0m9251
How many people are related to Genghis Khan and Julius Caesar?

Observation: [36;1m[1;3mQuestion: How many people are related to Genghis Khan and Julius Caesar?

I cannot answer this question due to insufficient information.

Tokens Used: 2067 Cost: $0.00[0m
Thought:[32;1m[1;3m I need to find out how many people are related to each person in modern times.
Action: Intermediate Answer
Action Input: How many people are related to Genghis Khan and Julius Caesar in modern times?[0m9251
How many people are related to Genghis Khan and Julius Caesar in modern times?


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 356ef569ff5ad93708cebe9f4db75db0 in your message.).



Observation: [36;1m[1;3mQuestion: How many people are related to Genghis Khan and Julius Caesar in modern times?

I cannot answer this question due to insufficient information.

Tokens Used: 1996 Cost: $0.00[0m
Thought:[32;1m[1;3m I need to find out how many people are related to each person in modern times, and compare the numbers.
Action: Intermediate Answer
Action Input: How many people are related to Genghis Khan and Julius Caesar in modern times, and which one has more people related to them?[0m9251
How many people are related to Genghis Khan and Julius Caesar in modern times, and which one has more people related to them?

Observation: [36;1m[1;3mQuestion: How many people are related to Genghis Khan and Julius Caesar in modern times, and which one has more people related to them?

I cannot answer this question due to insufficient information.

Tokens Used: 2341 Cost: $0.00[0m
Thought:[32;1m[1;3m I need to find out if there is any reliable data that can help me answer 

KeyboardInterrupt: 

In [213]:
llm = OpenAI(temperature=0.2) #, model_name='gpt-3.5-turbo')
tools = [QATool(docs)]
new_agent = initialize_agent(tools, llm, agent=AgentType.SELF_ASK_WITH_SEARCH, verbose=True)

In [220]:
print(new_agent.agent.llm_chain.prompt.template)

Question: Who lived longer, Muhammad Ali or Alan Turing?
Are follow up questions needed here: Yes.
Follow up: How old was Muhammad Ali when he died?
Intermediate answer: Muhammad Ali was 74 years old when he died.
Follow up: How old was Alan Turing when he died?
Intermediate answer: Alan Turing was 41 years old when he died.
So the final answer is: Muhammad Ali

Question: When was the founder of craigslist born?
Are follow up questions needed here: Yes.
Follow up: Who was the founder of craigslist?
Intermediate answer: Craigslist was founded by Craig Newmark.
Follow up: When was Craig Newmark born?
Intermediate answer: Craig Newmark was born on December 6, 1952.
So the final answer is: December 6, 1952

Question: Who was the maternal grandfather of George Washington?
Are follow up questions needed here: Yes.
Follow up: Who was the mother of George Washington?
Intermediate answer: The mother of George Washington was Mary Ball Washington.
Follow up: Who was the father of Mary Ball Washin

In [216]:
my_agent.run(train_data[0]['question'])



[1m> Entering new AgentExecutor chain...[0m


InvalidRequestError: This model's maximum context length is 4097 tokens, however you requested 4185 tokens (3929 in your prompt; 256 for the completion). Please reduce your prompt; or completion length.