In [None]:
import os
import getpass
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import DeepLake
from langchain.document_loaders import TextLoader
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.text_splitter import CharacterTextSplitter

os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')
os.environ['ACTIVELOOP_TOKEN'] = getpass.getpass('Activeloop Token:')

embeddings = OpenAIEmbeddings(disallowed_special=())

In [None]:
root_dir = './the-algorithm'
docs = []
for dirpath, dirnames, filenames in os.walk(root_dir):
    for file in filenames:
        try: 
            loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')
            docs.extend(loader.load_and_split())
        except Exception as e: 
            pass

In [None]:
text_splitter = CharacterTextSplitter(chunk_size=10000, chunk_overlap=0)
texts = text_splitter.split_documents(docs)

In [None]:
db = DeepLake(dataset_path=f"hub://bosaya/twitter-algorithm", read_only=True, embedding_function=embeddings)

db = DeepLake(dataset_path=f"hub://bosaya/twitter-algorithm", embedding_function=embeddings) 
db.add_documents(texts)

retriever = db.as_retriever()
retriever.search_kwargs['distance_metric'] = 'cos'
retriever.search_kwargs['fetch_k'] = 100
retriever.search_kwargs['maximal_marginal_relevance'] = True
retriever.search_kwargs['k'] = 10

In [None]:
model = ChatOpenAI(model='gpt-4') # switch to 'gpt-4'
qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)

In [None]:
questions = ["In what language is the Twitter Recommender Algorithm written?"
             "How does the recommender algorithm work?"
        ]

In [None]:
chat_history = []

for question in questions:  
    result = qa({"question": question, "chat_history": chat_history})
    chat_history.append((question, result['answer']))
    print(f"-> **Question**: {question} \n")
    print(f"**Answer**: {result['answer']} \n")