In [31]:
import os
from dotenv import load_dotenv

load_dotenv()

api_key = os.environ["OPENAI_API_KEY"] 

In [32]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.indexes import VectorstoreIndexCreator
from langchain_experimental.agents.agent_toolkits.csv.base import create_csv_agent
from langchain.agents.agent_types import AgentType
from langchain.memory import ConversationBufferMemory
import tiktoken

In [33]:
llm_model = "gpt-3.5-turbo"

In [34]:
from langchain.text_splitter import CharacterTextSplitter

In [46]:
# txt_file_path = "scalexi.txt"
txt_file_path = "tenacademy.txt"

In [47]:
loader = TextLoader(file_path= txt_file_path, encoding="utf-8")
data = loader.load()

In [48]:
text_splitter = CharacterTextSplitter(chunk_size=5600, chunk_overlap=200)
data = text_splitter.split_documents(data)

In [49]:
embeddings = OpenAIEmbeddings()

In [50]:
vectorstore = FAISS.from_documents(data, embedding=embeddings)

In [51]:
llm = ChatOpenAI(temperature=0.7, model_name="gpt-4")

In [52]:
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)


In [53]:
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
    memory=memory
)

In [54]:
query = "What is Observium"
result = conversation_chain({"question": query})
answer = result["answer"]
answer

"I'm sorry, but the provided context does not contain any information about Observium."

In [57]:
query = "What are the deliverables?"
result = conversation_chain({"question": query})
answer = result["answer"]
answer

"The expected deliverables for this project are:\n\nInterim Submission - Wednesday 8pm UTC\n1. Link to your code in GitHub: A repository where you will be using to complete the tasks in this week's challenge. A minimum requirement is that you have a well structured repository and some coding progress is made.\n2. A review report of your reading and understanding of Task 1 & 2 and any progress you made in other tasks.\n\nFinal Submission - Saturday 8pm UTC\n1. Link to your code in GitHub: Complete work for Finetuning LLMs with Amharic data, Complete work for Generating Amharic Ad texts, and Complete work for RAG quality, huggingface deployment, frontend.\n2. A blog post entry (which you can submit for example to Medium publishing) or a pdf report."

In [56]:
query = "What is the project about"
result = conversation_chain({"question": query})
answer = result["answer"]
answer


"The project is about developing an AI solution for AiQEM, an African startup focused on AI and Blockchain business solutions. AiQEM's latest project is an AI-based Telegram Ad solution called Adbar. The goal of the project is to improve the effectiveness of their promotional efforts by integrating powerful AI capabilities for Amharic text manipulation. \n\nThe project aims to create an Amharic RAG (Retrieval Augmented Generation) pipeline that will generate Amharic-based creative text Ad contents based on campaign information such as brand and product information and the content history of a Telegram channel. A successfully delivered project will ensure that the advertisements are both catchy and relevant to the Telegram community. \n\nThis will be achieved by finetuning a Language Learning Model (LLM) that has the capability to embed Amharic texts. The model should be selected from suitable open-source LLM models already capable of embedding Amharic texts, such as Nous Hermes Mistral

In [58]:
query = "How many deliverables are there?"
result = conversation_chain({"question": query})
answer = result["answer"]
answer


'There are two deliverables for this project:\n\n1. Interim Submission - Due Wednesday 8pm UTC\n2. Final Submission - Due Saturday 8pm UTC'