In [None]:
%pip install llama-index==0.10.18 llama-index-llms-groq==0.1.3 groq==0.4.2 llama-index-embeddings-huggingface==0.2.0

In [None]:
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    ServiceContext,
    load_index_from_storage
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.groq import Groq
# import os
# from dotenv import load_dotenv
# load_dotenv()
import warnings
warnings.filterwarnings('ignore')

In [None]:
# from google.colab import userdata
# GROQ_API_KEY = userdata.get('GROQ_API_KEY')
# GROQ_API_KEY = os.getenv("GROQ_API_KEY")

In [None]:
# data ingestion
reader = SimpleDirectoryReader(input_files=["/kaggle/input/squad-csv/SQuAD_csv.csv"])
documents = reader.load_data()


In [None]:
len(documents)

In [None]:
documents[0].metadata

In [None]:
text_splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=200)
nodes = text_splitter.get_nodes_from_documents(documents, show_progress=True)

In [None]:
len(nodes)

In [None]:
nodes[0].metadata

In [None]:
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
llm = Groq(model="llama3-70b-8192", api_key=GROQ_API_KEY)

In [None]:
service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm)

In [None]:
vector_index = VectorStoreIndex.from_documents(documents, show_progress=True, service_context=service_context, node_parser=nodes)

In [None]:
vector_index.storage_context.persist(persist_dir="./storage_mini")

In [None]:
storage_context = StorageContext.from_defaults(persist_dir="./storage_mini")

In [None]:
index = load_index_from_storage(storage_context, service_context=service_context)

In [None]:
query_engine = index.as_query_engine(service_context=service_context)

In [None]:
query = "Who was bynoce?"
resp = query_engine.query(query)

In [None]:
print(resp.response)