In [5]:
from components import Components
from llama_index.core import Settings

from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext, SimpleDirectoryReader
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core import VectorStoreIndex
from llama_index.core import PromptTemplate,Document 


from llama_index.core.node_parser import SentenceSplitter

import pandas as pd

In [6]:
#initialise components, reranker and retriever
rag_components = Components("Snowflake/snowflake-arctic-embed-s","mixedbread-ai/mxbai-embed-large-v1","gemma2-9b-it")

In [7]:
rag_components.model_name

'gemma2-9b-it'

Setting Contexts


In [8]:
Settings.embed_model = rag_components.get_embedding_model()
Settings.llm = rag_components.get_groq_llm()

Embedding model loaded!


In [9]:
vector_store = ChromaVectorStore(chroma_collection=rag_components.get_db())
storage_context = StorageContext.from_defaults(vector_store=vector_store)

Loading Papers

In [10]:
df = pd.read_parquet('data/test/test_neural_bridge_qa.parquet')

In [11]:
df.to_csv('data/test/test_neural_bridge_qa.csv', index=False)

In [12]:
context_list = []
for contexts in df['context'][:128]:
    context_list.append(Document(text=contexts))

In [13]:
pipeline = IngestionPipeline(
    transformations=[
          #SentenceSplitter(chunk_size=1000, chunk_overlap=200),
    ],
    vector_store=vector_store,
)

In [14]:
documents = pipeline.run(documents=context_list)

In [15]:
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context,similarity_top_k=5) #node_postprocessors=[rag_components.get_reranker()]
 

In [16]:
query_engine = index.as_query_engine(streaming=True,similarity_top_k=3)

In [17]:
qa_prompt_template_str = """
Context: {context_str}
Instructions:
- Be helpful and answer questions concisely. If you don't know the answer, say 'I don't know'
- Utilize the context provided for accurate and specific information.
- Incorporate your preexisting knowledge to enhance the depth and relevance of your response.
- Be concise and to the point.
Question: {query_str}
"""


qa_prompt_template = PromptTemplate(qa_prompt_template_str)
query_engine.update_prompts(
    {"response_synthesizer:text_qa_template":qa_prompt_template}
)


In [18]:
query_engine.update_prompts(
    {"response_synthesizer:refine_template":PromptTemplate("")}
)


In [19]:
question_list = []
for questions in df['question'][:128]:
    question_list.append(questions)
    

In [20]:
import time
for i in range(len(question_list)): 
    time.sleep(3)
    print(question_list[i])
    response = query_engine.query(question_list[i])
    response.print_response_stream()
    rowIndex = df.index[i]

    df.at[rowIndex, 'gen_answer'] = response.response_txt
    print('')
    print("--"*100)
    print(f'{i}/{128}')
 


Who is the music director of the Quebec Symphony Orchestra?
Fabien Gabel is the music director of the Quebec Symphony Orchestra. 

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0/128
Who were the four students of the University of Port Harcourt that were allegedly murdered?
The four students allegedly murdered were:

* Chiadika Lordson
* Ugonna Kelechi Obusor
* Mike Lloyd Toku
* Tekena Elkanah 



--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1/128
What did Paul Wall offer to all U.S. Olympic Medalists?
Paul Wall offered to give free gold grills to all U.S. Olympic medalists.  

--------------------------------------------------------------------------------------------------------

In [21]:
df[:128]

Unnamed: 0,context,question,answer,gen_answer
0,"HOUSTON (Jan. 23, 2018) – Fabien Gabel, music ...",Who is the music director of the Quebec Sympho...,The music director of the Quebec Symphony Orch...,Fabien Gabel is the music director of the Queb...
1,Port Harcourt – The Rivers police command on W...,Who were the four students of the University o...,The four students of the University of Port Ha...,The four students allegedly murdered were:\n\n...
2,Channels\nMusic\nStyle\nPop Culture\nSports\nS...,What did Paul Wall offer to all U.S. Olympic M...,Paul Wall wants to give free gold grills to al...,Paul Wall offered to give free gold grills to ...
3,The crazy level at which African countries imp...,What are the main agricultural products that A...,"African countries mainly export cocoa, edible ...","According to the text, African countries mainl..."
4,"CHI 2010 Workshop May 7 or 8, 2011 (final date...",What is the main goal of the CHI 2011 workshop...,The main goal of this one-day CHI 2011 worksho...,The main goal of the CHI 2011 workshop is to b...
...,...,...,...,...
123,The video below was found yesterday on the WaP...,What was the long-term study published in the ...,The long-term study published in the journal P...,The provided text doesn't mention a long-term ...
124,Trip Notes\nA little bit about India\nClimate:...,"What is the climate like in Goa, India?",The climate in Goa is humid and tropical with ...,The climate in Goa is humid and tropical with ...
125,Intuition is more than just a characteristic o...,What is the relationship between intuition and...,Intuition is more than just a characteristic o...,"According to the text, intuition is a cornerst..."
126,It looks like you're using an Ad Blocker.\nPle...,What action did Gov. Jan Brewer take in relati...,Brewer signed the 2011 legislative budget whic...,Gov. Jan Brewer signed the 2011 legislative bu...


In [22]:
df = df[:128]

In [23]:
df.to_csv(f'data/results/{rag_components.model_name}_128Q_Run2.csv',index=False)