In [2]:
# ! pip install langchain
# ! pip install llama-index
# ! pip installllama-hub
# ! pip install accelerate
# ! pip install bitsandbytes
# ! pip install transformers
# ! pip install sentence_transformers
# ! pip install InstructorEmbedding
# ! pip install chromadb
# ! pip install streamlit

In [1]:
import streamlit as st
from dotenv import load_dotenv
import torch
import sys
import os
from transformers import BitsAndBytesConfig

# llama_index
from langchain.embeddings import HuggingFaceInstructEmbeddings
from llama_index import VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage, ServiceContext
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.indices.postprocessor import SimilarityPostprocessor, KeywordNodePostprocessor
from llama_index.response_synthesizers import get_response_synthesizer
from llama_index.text_splitter import SentenceSplitter
from llama_index.vector_stores import ChromaVectorStore
from llama_index.storage.storage_context import StorageContext
from llama_index.prompts import PromptTemplate
from llama_index.llms import HuggingFaceLLM

# chromadb
import chromadb
import logging
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
from llama_index import SummaryIndex
from llama_index.response.pprint_utils import pprint_response
from pathlib import Path
from llama_index import download_loader
from llama_index.response.pprint_utils import pprint_response

In [2]:
def get_desired_llm():
  # Function to load LLM
  hf_token="Hugging Face Token"
  quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)
  llm = HuggingFaceLLM(
    model_name="meta-llama/Llama-2-7b-chat-hf",
    tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
    query_wrapper_prompt=PromptTemplate(" [INST] {query_str} [/INST] "),
    context_window=3900,
    model_kwargs={"token": hf_token, "quantization_config": quantization_config},
    tokenizer_kwargs={"token": hf_token},
    device_map="auto",
)
  return llm

In [3]:
def get_desired_embedding():
  #function to load Embedding
  embed_model = HuggingFaceInstructEmbeddings(
    model_name="hkunlp/instructor-large", model_kwargs={"device": DEVICE}
)
  return embed_model

In [4]:
def get_csv_file():
  #function to laod csv file
  SimpleCSVReader = download_loader("SimpleCSVReader")
  loader = SimpleCSVReader(encoding="utf-8")
  documents = loader.load_data(file=Path('/content/data/countries of the world.csv'))
  return documents



In [5]:
def create_chunk():

  llm=get_desired_llm()
  embed_model=get_desired_embedding()
  documents=get_csv_file()
  text_splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=20) # using the default chunk_# values as they work just fine

  # set context window
  context_window = 4096
  # set number of output tokens
  num_output = 256
  service_context = ServiceContext.from_defaults(llm=llm,
                                                embed_model=embed_model,
                                                text_splitter=text_splitter,
                                                #  context_window=context_window,
                                                #  num_output=num_output,
                                                )
  # # To make ephemeral client that is a short lasting client or an in-memory client
# db = chromadb.EphemeralClient()

  # initialize client, setting path to save data
  db = chromadb.PersistentClient(path="./chroma_db")

  # create collection
  chroma_collection = db.get_or_create_collection("csv_database")

  # assign chroma as the vector_store to the context
  vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
  storage_context = StorageContext.from_defaults(vector_store=vector_store)

  # create your index
  vector_index = VectorStoreIndex.from_documents(
      documents,
      storage_context=storage_context,
      service_context=service_context
  )
  return vector_index

In [8]:
def create_serach_query():
  #function to ask query from csv file

# create a query engine and query

  vector_index=create_chunk()
  query_engine = vector_index.as_query_engine(response_mode="compact")

  response = query_engine.query("Can you bring some insights from the provided data ?")

  pprint_response(response, show_source=True)

In [9]:
create_serach_query()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



load INSTRUCTOR_Transformer
max_seq_length  512
Final Response: Sure, here are some insights that can be gained from
the provided data:  1. Population: Afghanistan has the highest
population among all countries (31056997), while Armenia has the
lowest (39921833). 2. Area: Albania has the smallest area among all
countries (28748 square miles), while Armenia has the largest (2766890
square miles). 3. Population density: Afghanistan has the highest
population density (48 people per square mile), while Armenia has the
lowest (3 people per square mile). 4. Coastline: American Samoa has
the highest coastline ratio (coastline length to area) among all
countries (290.4%), while Armenia has the lowest (0%). 5. Net
migration: Albania has the highest net migration rate (1.26%), while
Armenia has the lowest (-0.358%). 6. Infant mortality: Afghanistan has
the highest infant mortality rate (12.13 deaths per 1000 births),
while Armenia has the lowest (0.22 deaths per
_________________________________