** TODO **
1. Get custom LLM class

In [24]:
from components import Components
from llama_index.core import Settings

from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext, SimpleDirectoryReader
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core import VectorStoreIndex
from llama_index.core import PromptTemplate,Document 


from llama_index.core.node_parser import SentenceSplitter

import pandas as pd

In [25]:
#initialise components, reranker and retriever
rag_components = Components("Snowflake/snowflake-arctic-embed-s","mixedbread-ai/mxbai-embed-large-v1","mixtral-8x7b-32768")

In [26]:
rag_components.model_name

'mixtral-8x7b-32768'

Setting Contexts


In [27]:
Settings.embed_model = rag_components.get_embedding_model()
Settings.llm = rag_components.get_groq_llm()

Embedding model loaded!


In [28]:
vector_store = ChromaVectorStore(chroma_collection=rag_components.get_db())
storage_context = StorageContext.from_defaults(vector_store=vector_store)

Loading Papers

In [29]:
df = pd.read_parquet('data/test/test_neural_bridge_qa.parquet')

In [30]:
df.to_csv('data/test/test_neural_bridge_qa.csv', index=False)

In [31]:
context_list = []
for contexts in df['context'][:128]:
    context_list.append(Document(text=contexts))

In [32]:
pipeline = IngestionPipeline(
    transformations=[
          #SentenceSplitter(chunk_size=1000, chunk_overlap=200),
    ],
    vector_store=vector_store,
)

In [33]:
documents = pipeline.run(documents=context_list)

In [34]:
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context,similarity_top_k=5) #node_postprocessors=[rag_components.get_reranker()]
 

In [35]:
query_engine = index.as_query_engine(streaming=True,similarity_top_k=3)

In [36]:
qa_prompt_template_str = """
Context: {context_str}
Instructions:
- Be helpful and answer questions concisely. If you don't know the answer, say 'I don't know'
- Utilize the context provided for accurate and specific information.
- Incorporate your preexisting knowledge to enhance the depth and relevance of your response.
- Be concise and to the point.
Question: {query_str}
"""


qa_prompt_template = PromptTemplate(qa_prompt_template_str)
query_engine.update_prompts(
    {"response_synthesizer:text_qa_template":qa_prompt_template}
)


In [37]:
query_engine.update_prompts(
    {"response_synthesizer:refine_template":PromptTemplate("")}
)


In [38]:
question_list = []
for questions in df['question'][:128]:
    question_list.append(questions)
    

In [39]:
import time
for i in range(len(question_list)): 
    time.sleep(3)
    print(question_list[i])
    response = query_engine.query(question_list[i])
    response.print_response_stream()
    rowIndex = df.index[i]

    df.at[rowIndex, 'gen_answer'] = response.response_txt
    print('')
    print("--"*100)
    print(f'{i}/{128}')
 


Who is the music director of the Quebec Symphony Orchestra?
I don't have real-time information access, so I can't provide the current music director of the Quebec Symphony Orchestra. However, as of 2021, the music director is Fabien Gabel.
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0/128
Who were the four students of the University of Port Harcourt that were allegedly murdered?
I'm sorry, but the context provided does not mention anything about four students of the University of Port Harcourt being allegedly murdered. I did a quick search, and according to several news sources, four students of the University of Port Harcourt were indeed allegedly murdered in 2012. Their names were Lloyd Mikari, Tekena Elkanah, Chiadika Biringa, and Ugonna Obuzor. However, I don't have any further information about the circumstances of their death

In [40]:
df[:128]

Unnamed: 0,context,question,answer,gen_answer
0,"HOUSTON (Jan. 23, 2018) – Fabien Gabel, music ...",Who is the music director of the Quebec Sympho...,The music director of the Quebec Symphony Orch...,"I don't have real-time information access, so ..."
1,Port Harcourt – The Rivers police command on W...,Who were the four students of the University o...,The four students of the University of Port Ha...,"I'm sorry, but the context provided does not m..."
2,Channels\nMusic\nStyle\nPop Culture\nSports\nS...,What did Paul Wall offer to all U.S. Olympic M...,Paul Wall wants to give free gold grills to al...,Paul Wall offered to give free gold grills to ...
3,The crazy level at which African countries imp...,What are the main agricultural products that A...,"African countries mainly export cocoa, edible ...","African countries mainly export cocoa, edible ..."
4,"CHI 2010 Workshop May 7 or 8, 2011 (final date...",What is the main goal of the CHI 2011 workshop...,The main goal of this one-day CHI 2011 worksho...,The main goal of the CHI 2011 workshop on larg...
...,...,...,...,...
123,The video below was found yesterday on the WaP...,What was the long-term study published in the ...,The long-term study published in the journal P...,I'm unable to find a specific long-term study ...
124,Trip Notes\nA little bit about India\nClimate:...,"What is the climate like in Goa, India?",The climate in Goa is humid and tropical with ...,"The climate in Goa, India is humid and tropica..."
125,Intuition is more than just a characteristic o...,What is the relationship between intuition and...,Intuition is more than just a characteristic o...,"According to the context, intuition is a corne..."
126,It looks like you're using an Ad Blocker.\nPle...,What action did Gov. Jan Brewer take in relati...,Brewer signed the 2011 legislative budget whic...,Gov. Jan Brewer eliminated the Arizona variant...


In [41]:
df = df[:128]

In [42]:
df.to_csv(f'data/results/{rag_components.model_name}_128Q_Run1.csv',index=False)