In [None]:
!pip install cohere
!pip install chromadb
!pip install langchain
!pip install llama-index
!pip install jq
!pip install ragas

In [None]:
import os
import openai
import sys
sys.path.append('../..')

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
#OpenAI Key
os.environ['OPENAI_API_KEY'] = "KEY"
openai.api_key  = os.environ['OPENAI_API_KEY']

#cohere Keys
os.environ["COHERE_API_KEY"] = "KEY"

## Document loading

In [None]:
# Loading whole arabic Wikipedia
from langchain.document_loaders.csv_loader import CSVLoader
import json
from pathlib import Path
from pprint import pprint
import sys
import csv

csv.field_size_limit(sys.maxsize)

path='articles_V3.csv'
loader = CSVLoader(file_path=path, source_column="title")

data = loader.load()




## Vectore Store and Embeddings

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.embeddings import CohereEmbeddings, OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.indexes import VectorstoreIndexCreator
import cohere

r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=400,
    separators=["\n\n", "\n", "(?<=\. )", " ", ""])

# Using wrapper

index_creator = VectorstoreIndexCreator(
    vectorstore_cls=Chroma,
    embedding= OpenAIEmbeddings(model='text-embedding-ada-002'),
    #text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    text_splitter= r_splitter
)


In [None]:

from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.llms import Cohere


# Create index
index = index_creator.from_loaders([loader])




### QA Chain

In [None]:
from langchain.chains import RetrievalQA
from ragas.metrics import faithfulness, answer_relevancy, context_relevancy, context_recall
from ragas.langchain import RagasEvaluatorChain
import pandas as pd
import csv



#cohere LM
llm = Cohere(cohere_api_key=os.environ["COHERE_API_KEY"], model= "command-nightly")


qa_chain = RetrievalQA.from_chain_type(
    llm, retriever=index.vectorstore.as_retriever(), return_source_documents=True
)
# load questiosn and answers
dataset = pd.read_json('/content/drive/MyDrive/RAG_Eman_code/dataset/rag_KaifLematha.json')


# file name represents llm_embedding_questionSource_contextSource
filename = f"Cohere_OpenAI_KaifLemathaQ_KaifLemathaC_2.csv"

faith=[]
ans_relevancy =[]
cont_relevancy =[]
cont_recall =[]

# write evaluation metric into csv file
with open(filename, 'a', newline='') as f:
 writer = csv.writer(f)
 writer.writerow(['Query', 'reponse', 'faithfulness','answer_relevancy','context_relevancy','context_recall'])

  #iterate through  questions from dataset file
 for index, row in dataset.iterrows():
  try:
     # answer question using document and ChatGPT3.5
     result = qa_chain({"query": row['question']})
     #print(result["result"])

     result['ground_truths'] = [row['answer'] ]

     # make eval chains
     eval_chains = {
       m.name: RagasEvaluatorChain(metric=m)
       for m in [faithfulness, answer_relevancy, context_relevancy, context_recall]}


     # write scores to CSV file
     scores= []
     for name, eval_chain in eval_chains.items():
                score_name = f"{name}_score"
                scores.append(eval_chain(result)[score_name])
     writer.writerow([result['query'],
                        result['result'],
                             str(scores[0]), str(scores[1]), str(scores[2]),str(scores[3])])
     faith.append(str(scores[0]))
     ans_relevancy.append(str(scores[1]))
     cont_relevancy.append(str(scores[2]))
     cont_recall.append(str(scores[3]))

  except:

        print("Whew!", sys.exc_info()[0], "occurred.")

        print("Next input please.")






In [None]:
average_faith = sum(faith) / len(faith)
average_ans_relevancy = sum(ans_relevancy) / len(ans_relevancy)
average_cont_relevancy = sum(cont_relevancy) / len(cont_relevancy)
average_cont_recall = sum(cont_recall) / len(cont_recall)

print("average_faithfulness "+ average_faith)
print("average_ans_relevancy "+ average_ans_relevancy)
print("average_cont_relevancy "+ average_cont_relevancy)
print("average_cont_recall "+ average_cont_recall)