In [1]:
pip install gradio langchain transformers faiss-gpu sentence-transformers langchain-community wikipedia -U bitsandbytes

Collecting gradio
  Downloading gradio-5.24.0-py3-none-any.whl.metadata (16 kB)
Collecting langchain
  Downloading langchain-0.3.23-py3-none-any.whl.metadata (7.8 kB)
Collecting transformers
  Downloading transformers-4.51.1-py3-none-any.whl.metadata (38 kB)
Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-4.0.2-py3-none-any.whl.metadata (13 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.

In [None]:
import json
import gradio as gr
import wikipedia
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFacePipeline
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory

# === Load JSON and prepare LangChain documents ===
with open("/kaggle/input/plant-dieases-llm/plant_disease_rag_json_data_cmpltd.json", "r") as f:
    data = json.load(f)

docs = [
    Document(
        page_content=f"Disease: {item['disease']}\n"
                     f"Symptoms: {item['symptoms']}\n"
                     f"Causes: {item['causes']}\n"
                     f"Treatment: {item['treatment']}",
        metadata={"disease": item["disease"]}
    )
    for item in data
]

# === Split, Embed, and Create Vectorstore ===
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512,
    chunk_overlap=100,
    separators=["\n\n", "\n", ".", " "]
)
split_docs = text_splitter.split_documents(docs)

embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")
db = FAISS.from_documents(split_docs, embeddings)
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 5})

from huggingface_hub import login
login("")  # paste your token here

# === Load Mistral-7B-Instruct with Quantization ===
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_use_double_quant=True)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map="auto")

text_gen_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    pad_token_id=tokenizer.eos_token_id,
    return_full_text=False
)

llm = HuggingFacePipeline(pipeline=text_gen_pipeline)

# === Custom Prompt ===
custom_prompt = PromptTemplate.from_template("""
You are an expert assistant in diagnosing and explaining plant diseases.

Instructions:
- Always respond with detailed, plant-science-based explanations.
- Include causes, symptoms, and treatment plans if relevant.
- Be concise but informative.
- When unsure, say "I don't know" rather than guessing.

Previous conversation:
{chat_history}

Relevant context:
{context}

Answer the user's question: {question}
""")

# === Memory to track conversation ===
memory = ConversationBufferMemory(
    memory_key="chat_history",
    input_key="question",
    output_key="answer",
    return_messages=True
)

# === Retrieval Chain ===
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    combine_docs_chain_kwargs={"prompt": custom_prompt}
)

# === Wikipedia Fallback ===
def fallback_wikipedia_search(query):
    try:
        summary = wikipedia.summary(query, sentences=2)
        return f"(From Wikipedia)\n{summary}"
    except Exception:
        return "Sorry, I couldn't find a good explanation for that."

# === Short / Poor Answer Filter ===
def is_bad_response(response: str, question: str) -> bool:
    return not response or \
        len(response.strip()) < 30 or \
        response.strip().lower() in question.strip().lower() or \
        any(bad in response.lower() for bad in [
            "i don't know", "no information", "not sure", "sorry", "unable to find", "defoliated"
        ])

# === Build recent context string ===
def get_recent_context(history, n=2):
    return "\n".join([f"User: {u}\nAssistant: {a}" for u, a in history[-n:]])

# === Fallback Answer Generator for Causal LLM ===
def fallback_llm_answer(message, history, rag_context=None):
    context_snippet = rag_context or "No RAG context was found."
    chat_context = get_recent_context(history, n=3)

    prompt = f"""
You are an expert plant disease assistant. A user is asking about a plant disease or a follow-up question.

Context from plant disease data or Wikipedia:
{context_snippet}

Conversation so far:
{chat_context}

User: {message}
Assistant:"""

    generated = text_gen_pipeline(prompt)[0]['generated_text']
    return generated.strip()

# === Main Chat Function ===
def chat(message, history):
    try:
        response_dict = qa_chain.invoke({"question": message})
        response = response_dict.get("answer", "") if isinstance(response_dict, dict) else response_dict

        if is_bad_response(response, message):
            docs = retriever.get_relevant_documents(message)
            rag_context = "\n\n".join([doc.page_content for doc in docs]) if docs else ""

            if len(rag_context.strip()) < 100:
                rag_context += "\n\n" + fallback_wikipedia_search(message)

            return fallback_llm_answer(message, history, rag_context=rag_context)

        return response
    except Exception as e:
        return f"An error occurred: {str(e)}"

# === Gradio Chat Interface ===
demo = gr.ChatInterface(
    fn=chat,
    title="🌿 Plant Disease Assistant (Mistral-powered)",
    description="Ask about plant diseases, symptoms, causes, and treatments."
)

demo.launch(share=True)

  embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/779 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Device set to use cuda:0
  llm = HuggingFacePipeline(pipeline=text_gen_pipeline)
  memory = ConversationBufferMemory(
  self.chatbot = Chatbot(


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://a440297395e799d0da.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


