In [None]:
!pip install langchain faiss-cpu transformers accelerate sentence-transformers pypdf python-docx unstructured

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting pypdf
  Downloading pypdf-5.4.0-py3-none-any.whl.metadata (7.3 kB)
Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Collecting unstructured
  Downloading unstructured-0.17.2-py3-none-any.whl.metadata (24 kB)
Collecting filetype (from unstructured)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting python-magic (from unstructured)
  Downloading python_magic-0.4.27-py2.py3-none-any.whl.metadata (5.8 kB)
Collecting emoji (from unstructured)
  Downloading emoji-2.14.1-py3-none-any.whl.metadata (5.7 kB)
Collecting dataclasses-json (from unstructured)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting python-iso639 (from unstructured)
  Downloading python_iso639-2025.2.18-py3-none-any.whl.metadata (14 kB)
Collecting langdetect (from unstructured)
  Downloading langdetect-1.0.9

In [None]:
!pip install -U langchain langchain-community


Collecting langchain
  Downloading langchain-0.3.22-py3-none-any.whl.metadata (7.8 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.20-py3-none-any.whl.metadata (2.4 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Downloading langchain-0.3.22-py3-none-any.whl (1.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m21.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_community-0.3.20-py3-none-any.whl (2.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m52.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloadi

In [None]:
from google.colab import files
from langchain_community.document_loaders import PyPDFLoader, TextLoader, UnstructuredWordDocumentLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import os

#  Hugging Face Token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = ""

# Upload multiple files
uploaded = files.upload()
file_paths = list(uploaded.keys())

#  Load documents (PDF, DOCX, TXT)
all_docs = []
for path in file_paths:
    ext = os.path.splitext(path)[1].lower()
    if ext == ".pdf":
        loader = PyPDFLoader(path)
    elif ext == ".txt":
        loader = TextLoader(path)
    elif ext in [".docx", ".doc"]:
        loader = UnstructuredWordDocumentLoader(path)
    else:
        print(f"Unsupported file type: {path}")
        continue
    docs = loader.load()
    for doc in docs:
        doc.metadata["source"] = os.path.basename(path)
    all_docs.extend(docs)

#  Split text into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = splitter.split_documents(all_docs)

# Embed chunks
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embedding_model)

# Load LLM from Hugging Face (e.g., Mistral)
llm = HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-Instruct-v0.1",
    model_kwargs={"temperature": 0.5, "max_new_tokens": 512}
)

#  Strict Prompt Template
custom_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a helpful and concise crop rotation assistant.

ONLY use the context below to answer the question.
DO NOT GUESS or add extra explanations.
If the answer isn't clearly in the context, say: "This question may require a deeper analysis by an agronomy expert."

Context:
{context}

Question: {question}
Answer:
"""
)

#  Set up retrieval with more chunks
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 8})

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True,  # Show sources!
    chain_type_kwargs={"prompt": custom_prompt}
)

#  Ask user questions
while True:
    query = input("\n Ask your crop rotation question (or type 'exit'): ")
    if query.lower() == "exit":
        break

    result = qa_chain({"query": query})
    print(f"\n Answer:\n{result['result']}\n")

    # 🔗 Optional: Show source file names
    print(" Sources used:")
    for doc in result["source_documents"]:
        print(f" - {doc.metadata['source']}")

Saving researchgatepaper.docx to researchgatepaper (5).docx
Saving crop rotation qa.txt to crop rotation qa (5).txt
Saving intercropping.pdf to intercropping (4).pdf
Saving crop-rotation.pdf to crop-rotation (5).pdf

❓ Ask your crop rotation question (or type 'exit'): if i plant tomato this year what can i plant next?


  result = qa_chain({"query": query})



💡 Answer:

You are a helpful and concise crop rotation assistant.

ONLY use the context below to answer the question.
DO NOT GUESS or add extra explanations.
If the answer isn't clearly in the context, say: "This question may require a deeper analysis by an agronomy expert."

Context:
Even Star Organic Farm
Brett Grohsgal, Md.
Crimson Clover
Tomatoes OR Peppers
Red Clover Winter Brassicas Lettuce (strip crop)
Red Clover
Okra – Flowers – Basil
Winter 
Brassicas Vetch Lettuce 
(strip crop)
Vetch
Cucurbits
Crimson 
Clover
Lettuce 
(strip crop)
Winter 
Brassicas
Red Clover
Red Clover Winter 
Brassicas
Lettuce 
(strip crop)
Return to Year One
Four Winds Farm
Polly & Jay Armour, N.Y.
Oats
Potatoes OR Tomatoesa
Straw mulch Garlic
(in alternate 
beds)b
Winter Squash
(in alternate 
beds) Straw mulch
Straw mulch
Beans
Compost
Direct-Seeded Quick Crops / 
Small-Seeded Greens / Radishes
Cucumbers 
(mulched with 
straw)
Lettuce
Return to Year One
Calvert’s Gift Farm
Jack Gurley, Md.
Garlic
Winter 

KeyboardInterrupt: Interrupted by user

In [None]:
from google.colab import files
from langchain_community.document_loaders import PyPDFLoader, TextLoader, UnstructuredWordDocumentLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.retrievers.ensemble import EnsembleRetriever
from langchain.retrievers import BM25Retriever
import os

# Hugging Face Token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = ""

# Upload files
uploaded = files.upload()
file_paths = list(uploaded.keys())

# Load files
all_docs = []
for path in file_paths:
    ext = os.path.splitext(path)[1].lower()
    if ext == ".pdf":
        loader = PyPDFLoader(path)
    elif ext == ".txt":
        loader = TextLoader(path)
    elif ext in [".docx", ".doc"]:
        loader = UnstructuredWordDocumentLoader(path)
    else:
        print(f"Unsupported file: {path}")
        continue
    docs = loader.load()
    for doc in docs:
        doc.metadata["source"] = os.path.basename(path)
    all_docs.extend(docs)

#  Smarter chunking
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = splitter.split_documents(all_docs)

#  Vector retriever
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embedding_model)
vector_retriever = vectorstore.as_retriever(search_kwargs={"k": 8})

# Add keyword-based BM25 retriever
bm25_retriever = BM25Retriever.from_documents(chunks)
bm25_retriever.k = 8

# Combine both into an ensemble
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, vector_retriever], weights=[0.5, 0.5])

#  Load Mistral LLM
llm = HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-Instruct-v0.1",
    model_kwargs={"temperature": 0.4, "max_new_tokens": 512}
)

# Prompt Template
custom_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a helpful and concise crop rotation assistant.

Only use the context below to answer the question. If the answer isn't directly in the context, say:
"This question may require a deeper analysis by an agronomy expert."

Context:
{context}

Question: {question}
Answer:
"""
)

# Final QA Chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=ensemble_retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": custom_prompt}
)

# Ask loop with source debugging
while True:
    query = input("\n Ask your crop rotation question (or type 'exit'): ")
    if query.lower() == "exit":
        break
    result = qa_chain({"query": query})
    print(f"\n Answer:\n{result['result']}\n")
    print(" Sources used:")
    for doc in result["source_documents"]:
        print(f" - {doc.metadata['source']}")

Saving researchgatepaper.docx to researchgatepaper (7).docx
Saving crop rotation qa.txt to crop rotation qa (7).txt
Saving intercropping.pdf to intercropping (6).pdf
Saving crop-rotation.pdf to crop-rotation (7).pdf

❓ Ask your crop rotation question (or type 'exit'): If I plant tomato this year, what can I plant next?





💡 Answer:

You are a helpful and concise crop rotation assistant.

Only use the context below to answer the question. If the answer isn't directly in the context, say:
"This question may require a deeper analysis by an agronomy expert."

Context:
related (varieties of S. trifoliorum); each can attack a 
variety of crops, including pea, lettuce, and possibly 
other plants (36).
8 H XXXX 
8 I D, sclerotinia stalk rot (67; see appendix 3, p. 124).
8 J D, lettuce, cabbage, and cress can be symptomless 
carriers of Colletotrichum coccodes, which causes tomato 
anthracnose and black dot. C-, short-season salad 
greens act as a cover crop and are harvested in time to 
plant tomato, eggplant, or pepper (83).

accumulate organic matter and nitrogen.
• Grow winter-killed cover crops (oat-pea) before early-
season crops, so the seedbed will be easy to prepare.
• Never grow any crop after itself.
Nightshades (tomatoes, potatoes, peppers, eggplants):
• Grow tomatoes after peas, lettuce, or spinach




💡 Answer:

You are a helpful and concise crop rotation assistant.

Only use the context below to answer the question. If the answer isn't directly in the context, say:
"This question may require a deeper analysis by an agronomy expert."

Context:
Nitrogen fixing crops should alternate with nitrogen demanding crops. . . .
Wherever possible, catch crops, green manures, and undersowing techniques should be used to keep the soil covered. . . .
Crops which develop slowly and are therefore susceptible to weeds should follow weed suppressing crops. . . .
Alternate between leaf and straw crops. . . .
Where a risk of disease or soil borne pest problems exists, potential host crops should only occur in the rotation at

if it is harvested for silage, sow a winter grain cover crop 
after harvest. If the corn is harvested as grain, consider in-
terseeding a cover crop like red clover or annual ryegrass 
just after last cultivation (96). Planting the cover crop af -
ter the corn is well established

In [None]:

from google.colab import files
from langchain_community.document_loaders import PyPDFLoader, TextLoader, UnstructuredWordDocumentLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.retrievers import BM25Retriever
from langchain.retrievers.ensemble import EnsembleRetriever
import os

# Set Hugging Face Token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = ""

# Upload files
uploaded = files.upload()
file_paths = list(uploaded.keys())

# Load and parse all documents
all_docs = []
for path in file_paths:
    ext = os.path.splitext(path)[1].lower()
    if ext == ".pdf":
        loader = PyPDFLoader(path)
    elif ext == ".txt":
        loader = TextLoader(path)
    elif ext in [".docx", ".doc"]:
        loader = UnstructuredWordDocumentLoader(path)
    else:
        print(f" Unsupported file type: {path}")
        continue
    docs = loader.load()
    for doc in docs:
        doc.metadata["source"] = os.path.basename(path)
    all_docs.extend(docs)

#  Chunking + deduplication
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = splitter.split_documents(all_docs)

# Deduplicate chunks by text content
unique_texts = {}
for doc in chunks:
    content = doc.page_content.strip()
    if content not in unique_texts:
        unique_texts[content] = doc
unique_chunks = list(unique_texts.values())
print(f" {len(unique_chunks)} unique chunks loaded (deduped from {len(chunks)} raw chunks)")

# Embedding & vector retriever
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(unique_chunks, embedding_model)
vector_retriever = vectorstore.as_retriever(search_kwargs={"k": 8})

# BM25 keyword retriever
bm25_retriever = BM25Retriever.from_documents(unique_chunks)
bm25_retriever.k = 8

# Combine them: ensemble retriever
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, vector_retriever],
    weights=[0.5, 0.5]
)

# Load Mistral 7B-Instruct via Hugging Face Hub
llm = HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-Instruct-v0.1",
    model_kwargs={"temperature": 0.4, "max_new_tokens": 512}
)

# Custom crop rotation prompt
custom_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a helpful and concise crop rotation assistant.

Use only the context below to answer the user's question.
NEVER add extra questions or answers that the user didn't ask.
If the answer isn't in the context, just say: "This question may require a deeper analysis by an agronomy expert."

Context:
{context}

Question: {question}
Answer (only answer this exact question):
"""
)


# Retrieval QA setup
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=ensemble_retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": custom_prompt}
)

# Ask Questions in a Loop
while True:
    query = input("\n Ask your crop rotation question (or type 'exit'): ")
    if query.lower() == "exit":
        break
    result = qa_chain({"query": query})
    print(f"\n  Answer:\n{result['result']}\n")
    print("Sources used:")
    for doc in result["source_documents"]:
        print(f" - {doc.metadata['source']}")



/bin/bash: -c: line 1: syntax error near unexpected token `('
/bin/bash: -c: line 1: `pip install rank_bm25# ✅ Install required packages (run once if not installed)'


Saving researchgatepaper.docx to researchgatepaper (8).docx
Saving crop rotation qa.txt to crop rotation qa (8).txt
Saving intercropping.pdf to intercropping (7).pdf
Saving crop-rotation.pdf to crop-rotation (8).pdf
✅ 1512 unique chunks loaded (deduped from 1512 raw chunks)

❓ Ask your crop rotation question (or type 'exit'): If I plant tomato this year, what can I plant next?





💡 Answer:

You are a helpful and concise crop rotation assistant.

Use the context below to answer the user's question.
Only answer the question asked. Do not add extra examples or Q&A.
If the answer isn't in the context, just say "This question may require a deeper analysis by an agronomy expert."

Context:
related (varieties of S. trifoliorum); each can attack a 
variety of crops, including pea, lettuce, and possibly 
other plants (36).
8 H XXXX 
8 I D, sclerotinia stalk rot (67; see appendix 3, p. 124).
8 J D, lettuce, cabbage, and cress can be symptomless 
carriers of Colletotrichum coccodes, which causes tomato 
anthracnose and black dot. C-, short-season salad 
greens act as a cover crop and are harvested in time to 
plant tomato, eggplant, or pepper (83).

accumulate organic matter and nitrogen.
• Grow winter-killed cover crops (oat-pea) before early-
season crops, so the seedbed will be easy to prepare.
• Never grow any crop after itself.
Nightshades (tomatoes, potatoes, peppe




💡 Answer:

You are a helpful and concise crop rotation assistant.

Use the context below to answer the user's question.
Only answer the question asked. Do not add extra examples or Q&A.
If the answer isn't in the context, just say "This question may require a deeper analysis by an agronomy expert."

Context:
What are the best crops to include in a crop rotation system?
The best crops to rotate depend on your soil and region. Still, common systems include rotating
1. Nitrogen-fixing legumes- e.g., beans and peas can enrich the soil with nitrogen.
2. Nitrogen-demanding crops, such as corn and wheat, require higher nitrogen levels and benefit from the nutrients provided by previous
legumes.

Nitrogen fixing crops should alternate with nitrogen demanding crops. . . .
Wherever possible, catch crops, green manures, and undersowing techniques should be used to keep the soil covered. . . .
Crops which develop slowly and are therefore susceptible to weeds should follow weed suppressing crops. 




💡 Answer:

You are a helpful and concise crop rotation assistant.

Use the context below to answer the user's question.
Only answer the question asked. Do not add extra examples or Q&A.
If the answer isn't in the context, just say "This question may require a deeper analysis by an agronomy expert."

Context:
How often should I rotate my crops?
It's generally recommended to rotate crops every one to three growing seasons, depending on the crops and soil conditions. A typical rotation cycle could include legumes followed by grains, then root crops or leafy greens.
Can crop rotation be practiced on a small farm or garden?

and an aboveground crop.
• Grow legume cover crops before potatoes or corn, so 
that they can feed the crops.
• Grow potatoes before crops that are poor competitors, 
because potato production involves aggressive 
cultivation and further working of the soil during 
harvest, both of which reduce weed pressure.
• Avoid growing potatoes before corn, because both are 
heav