In [1]:
import logging
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("openai").setLevel(logging.WARNING)
logging.basicConfig(level=logging.WARNING)

import pprint, random
import yaml, os, json, glob
from PyPDF2 import PdfReader
from llama_index.core.llms import (
                                ChatMessage,
                                MessageRole
                                )
from llama_index.llms.groq import Groq
from llama_index.llms.openai import OpenAI
from llama_index.core.prompts import Prompt
from llama_index.core import ChatPromptTemplate
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter
from llama_index.finetuning import generate_qa_embedding_pairs
from llama_index.core.evaluation import EmbeddingQAFinetuneDataset
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.finetuning import SentenceTransformersFinetuneEngine
from llama_index.core import Settings, VectorStoreIndex

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
with open('secrets.yaml') as f:
    secrets = yaml.load(f, Loader=yaml.FullLoader)

os.environ["GROQ_API_KEY"] = secrets['GROQ_API_KEY']
os.environ["MONGO_DB_URI"] = secrets['MONGO_DB_URI']
os.environ["VOYAGE_API_KEY"] = secrets['VOYAGE_API_KEY']

completion_llm = Groq(
                    model="llama3-70b-8192", 
                    api_key=os.environ["GROQ_API_KEY"],
                    temperature=0.0
                    )

embed_model = HuggingFaceEmbedding(
                                    model_name="BAAI/bge-small-en",
                                    trust_remote_code=True,
                                    device="cpu"
                                    )

Settings.embed_model = embed_model
Settings.llm = completion_llm

  warn(f"Failed to load image Python extension: {e}")


In [3]:
documents = SimpleDirectoryReader(
                                input_files=["data/Tomato-Production-Guideline-2019_200908_054054.pdf"]
                                ).load_data()
documents

[Document(id_='ceca702c-93f3-4e48-8c1d-026cae1fb099', embedding=None, metadata={'page_label': '1', 'file_name': 'Tomato-Production-Guideline-2019_200908_054054.pdf', 'file_path': 'data\\Tomato-Production-Guideline-2019_200908_054054.pdf', 'file_type': 'application/pdf', 'file_size': 1696626, 'creation_date': '2024-11-30', 'last_modified_date': '2024-11-22'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='SEEDS OF SUCCESS\nCUSTOMER SERVICES: 0860 782 753  •  WWW.STARKEAYRES.CO.ZA• MEMBER OF THE PLENNEGY GROUP\nTOMATO\nPRODUCTION GUIDELINE', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'),
 Document(id_='f0d38d65-0844-4cc8-afda-d14f186b

In [4]:
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_chat_engine()

In [None]:
while True:
    query = input("Enter your query: ")
    response = query_engine.query(query)

    print(f"USER QUERY: {query}")
    print(f"BOT RESPONSE: {response}")
    print("\n")

    if query == "exit":
        break

USER QUERY: tomato was originally taken from where
BOT RESPONSE: Tomato was originally taken from Central America.


USER QUERY: what are the 7 identification classification stages of tomato
BOT RESPONSE: The 7 identification and marketing classification stages of the ripening process of a tomato are: Green, Mature Green, Colour Breaker, Half Ripe, Ripe, Red Ripe, and Full Ripe.


USER QUERY: what are the 7 marketing classification stages of tomato
BOT RESPONSE: The 7 marketing classification stages of tomato are: Green, Mature Green, Colour Breaker, Half Ripe, Ripe, Red Ripe, and Full Ripe.


