In [1]:
## Apple Orchard RAG Chatbot with meta llama3.1 LLM 
## And pineconeDB as vector DB
## Source Of Info --> All data related to our apple orchard, and farming. 

In [2]:
original_model = "meta-llama/Meta-Llama-3.1-8B-Instruct"

In [None]:
!pip install sentence-transformers
!pip install ctransformers
!pip install pinecone-client
!pip install langchain
!pip install pypdf
!pip install python-dotenv

In [None]:
!pip install langchain_community
!pip install huggingface_hub

In [3]:
!pip install pypdf



In [None]:
!pip install --upgrade langchain-pinecone

In [5]:
model_name_or_path = "TheBloke/Llama-2-7B-Chat-GGML"
model_basename = "llama-2-7b-chat.ggmlv3.q4_0.bin" # the model is in bin format

In [6]:
from huggingface_hub import hf_hub_download
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

llama-2-7b-chat.ggmlv3.q4_0.bin:   0%|          | 0.00/3.79G [00:00<?, ?B/s]

In [7]:
zip_link = "https://drive.google.com/file/d/1PaIGJpu7TGySFxEA5ljfvnX0P0pW5C6t/view?usp=drive_link"

### Downloading the resources pdf

In [8]:
import os
import requests

# Create a 'pdfs' folder if it doesn't exist
if not os.path.exists('data'):
    os.makedirs('data')

# URL of the PDF you want to download
pdf_url = book_link

# Get the filename from the URL
filename = pdf_url.split("/")[-1]

# Full path where the PDF will be saved
save_path = os.path.join('data', filename)

# Download the PDF
response = requests.get(pdf_url)

# Check if the request was successful
if response.status_code == 200:
    # Write the content to a file
    with open(save_path, 'wb') as file:
        file.write(response.content)
    print(f"PDF downloaded and saved to {save_path}")
else:
    print(f"Failed to download PDF. Status code: {response.status_code}")

PDF downloaded and saved to data/view?usp=drive_link


In [7]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

In [8]:
PINECONE_API_KEY="your-api-key-here"

PINECONE_API_ENV="starter"

In [9]:
from langchain.document_loaders import PyPDFDirectoryLoader

In [12]:
loader = PyPDFDirectoryLoader("/kaggle/input/apple-orchard-res-1/Apple-Chatbot-resources-pdf")

In [13]:
extracted_data = loader.load()

In [14]:
#Create text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [15]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 2449


In [10]:
#download embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [11]:
embeddings = download_hugging_face_embeddings()

  warn_deprecated(
The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [12]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [13]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


In [14]:
index_name="apple-chatbot"

In [15]:
import os
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(
    api_key=PINECONE_API_KEY
)
index = pc.Index(index_name)

In [22]:
# Create embeddings for your text chunks
embedded_texts = embeddings.embed_documents([t.page_content for t in text_chunks])

# Prepare vectors for upsert
vectors_to_upsert = []
for i, (chunk, embedding) in enumerate(zip(text_chunks, embedded_texts)):
    vector = {
        "id": f"chunk_{i}",
        "values": embedding,
        "metadata": {
            "text": chunk.page_content,
            # Add any other metadata you want to include
        }
    }
    vectors_to_upsert.append(vector)

# Function to split list into chunks
def chunk_list(lst, chunk_size):
    return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)]

# Split vectors into smaller batches
batch_size = 100  # You might need to adjust this
batches = chunk_list(vectors_to_upsert, batch_size)

# Upsert batches to Pinecone
for i, batch in enumerate(batches):
    try:
        index.upsert(
            vectors=batch,
            namespace="ns1"  # Replace with your desired namespace
        )
        print(f"Batch {i+1}/{len(batches)} upserted successfully")
    except Exception as e:
        print(f"Error upserting batch {i+1}: {str(e)}")
        # You might want to implement retry logic here

print("Upsert completed")

Batch 1/25 upserted successfully
Batch 2/25 upserted successfully
Batch 3/25 upserted successfully
Batch 4/25 upserted successfully
Batch 5/25 upserted successfully
Batch 6/25 upserted successfully
Batch 7/25 upserted successfully
Batch 8/25 upserted successfully
Batch 9/25 upserted successfully
Batch 10/25 upserted successfully
Batch 11/25 upserted successfully
Batch 12/25 upserted successfully
Batch 13/25 upserted successfully
Batch 14/25 upserted successfully
Batch 15/25 upserted successfully
Batch 16/25 upserted successfully
Batch 17/25 upserted successfully
Batch 18/25 upserted successfully
Batch 19/25 upserted successfully
Batch 20/25 upserted successfully
Batch 21/25 upserted successfully
Batch 22/25 upserted successfully
Batch 23/25 upserted successfully
Batch 24/25 upserted successfully
Batch 25/25 upserted successfully
Upsert completed


In [16]:
# Your query
query = "What are different soil types for apple cultivation"

# Generate embedding for the query
query_embedding = embeddings.embed_query(query)

# Perform similarity search
search_results = index.query(
    namespace="ns1",  # Replace with your actual namespace
    vector=query_embedding,
    top_k=3,  # Number of results you want
    include_values=True,
    include_metadata=True
)

# Process and print results
print("Results:")
for match in search_results['matches']:
    print(f"ID: {match['id']}")
    print(f"Score: {match['score']}")
    print(f"Metadata: {match['metadata']}")
    print("---")

Results:
ID: chunk_422
Score: 0.744673252
Metadata: {'text': 'Apple Hi-Tech Cultivation Practices38available soil moisture(A.S.M) is considered adequate for all soil types. With'}
---
ID: chunk_303
Score: 0.732027709
Metadata: {'text': 'Apple Hi-Tech Cultivation Practices123.  soil       \nDeep loam soil is best for apples. Clay-loam soils having good drainage \ncan also be used. In sandy-loam soils, apple trees are adversely affected by'}
---
ID: chunk_2177
Score: 0.70661062
Metadata: {'text': 'apple can be grown in wide range of soil but deep fertile loamy soil (2-2.5 \nm deep) is suitable for its cultivation. the presence of lime in the soil is good for \napple cultivation. p roper drainage arrangements should be made in heavy flat \nsoil for safe drainage of excess water from the field and pH range 5.8 to 6.2 is an ideal for apple cultivation.\napple can be grown normally in areas where chilling hours varied from'}
---


In [None]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [17]:
prompt_template2="""
Helpful Answer for Farmer:
Use the following pieces of information to answer the farmer's question about apple orchard management:

    Context: {context}
    Question: {question}

If you don't know the answer, say "I'm not sure, but I can try to find more information for you."
Only return the helpful answer below and nothing else.
Helpful Answer:
"""

In [18]:
PROMPT=PromptTemplate(template=prompt_template2, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [19]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [20]:
if device == "cuda":
        torch.set_default_tensor_type('torch.cuda.FloatTensor')

In [21]:
from accelerate import Accelerator
accelerator = Accelerator()

In [22]:
llm=CTransformers(model=model_path,
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8})

In [23]:
llm = accelerator.prepare(llm)

In [24]:
from langchain.vectorstores import Pinecone as LangchainPinecone
from langchain.chains import RetrievalQA
from pinecone import Pinecone

# Create a LangChain vectorstore
docsearch = LangchainPinecone(index, embeddings.embed_query, "text")

  warn_deprecated(


In [25]:
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(search_kwargs={'k': 3}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [26]:
while True:
    user_input=input(f"Input Prompt Stop(s):")
    if user_input == "s":
        break
    result=qa({"query": user_input})
    print("Response : ", result["result"])

Input Prompt Stop(s): apple pests


  warn_deprecated(


Response :  There are several pests that can affect apple orchards, including codling moths, apple maggots, and aphids. To manage these pests, it's important to use integrated pest management (IPM) strategies that combine cultural, biological, and chemical controls. This can include things like:

* Removing weeds and debris around the orchard to reduce hiding places for pests
* Using natural predators or parasites to control pest populations
* Applying insecticides only when necessary and using the least toxic options first
* Monitoring pest populations regularly to catch any issues early

I hope this helps! Let me know if you have any other questions.


Input Prompt Stop(s): how to save my apple from pests?


Response :  There are several methods you can use to manage pests in your apple orchard, including:

1. Integrated Pest Management (IPM) techniques, such as using a combination of chemical and biological controls, to minimize the use of chemical pesticides.
2. Practicing good orchard hygiene, such as removing weeds and debris, can help reduce the likelihood of pests being present in the orchard.
3. Using natural predators or parasites, such as lady beetles or lacewings, to control pest populations.
4. Planting resistant cultivars, which are bred to be less susceptible to certain pests.
5. Implementing cultural practices, such as thinning fruit to reduce the density of trees and increase air circulation, can help prevent pests from becoming established.
6. Using traps or barriers to keep pests away from the orchard.
7. Consider using organic pesticides, which are derived from natural sources and are considered safer for the environment and human consumption.
8. Monitoring pest populatio

Input Prompt Stop(s): s


### getting the requirements, but ineffecitve due to absence of venv

In [None]:
!pip freeze > requirements.txt