In [13]:
"""
Simplified Plant Disease Report Generator: LLM Implementation
============================================================

This implementation provides a more straightforward approach to creating an LLM-based system
for generating plant disease reports, with minimal dependencies.
"""

import os
import numpy as np
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from PIL import Image
import json
import requests

class PlantDiseaseReportGenerator:
    """
    A simplified LLM-based model for generating plant disease reports
    that uses a pre-trained language model without complex training.
    """

    def __init__(self, model_name="gpt2-medium"):
        """
        Initialize the plant disease report generator with a specified pre-trained model.

        Args:
            model_name (str): The name of the pre-trained model to use.
                              Default is "gpt2-medium".
        """
        self.model_name = model_name

        # Check if torch is available
        if not torch.cuda.is_available():
            print("CUDA not available, using CPU. This may be slow.")

        # Initialize tokenizer and model
        try:
            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
            self.model = AutoModelForCausalLM.from_pretrained(model_name)

            # For GPT-2 models, ensure padding token is set
            if "gpt2" in model_name:
                self.tokenizer.pad_token = self.tokenizer.eos_token

        except Exception as e:
            print(f"Error loading model: {e}")
            print("Try installing the required packages with:")
            print("!pip install transformers torch")
            raise

        # Load plant disease knowledge base if available
        self.knowledge_base = self._load_knowledge_base()

    def _load_knowledge_base(self, file_path="plant_diseases.json"):
        """
        Load plant disease knowledge from a JSON file.

        Args:
            file_path (str): Path to the knowledge base file.

        Returns:
            dict: The loaded knowledge base or an empty dict if file not found.
        """
        try:
            if os.path.exists(file_path):
                with open(file_path, 'r') as f:
                    return json.load(f)
            else:
                # Create a minimal default knowledge base
                return {
                    "common_diseases": [
                        {
                            "name": "Powdery Mildew",
                            "symptoms": "White powdery spots on leaves and stems",
                            "treatment": "Apply fungicide, improve air circulation",
                            "prevention": "Avoid overhead watering, use resistant varieties"
                        },
                        {
                            "name": "Leaf Spot",
                            "symptoms": "Brown or black spots on leaves",
                            "treatment": "Remove affected leaves, apply fungicide",
                            "prevention": "Avoid wetting foliage, ensure proper spacing"
                        },
                        {
                            "name": "Root Rot",
                            "symptoms": "Wilting despite adequate water, yellowing leaves, brown roots",
                            "treatment": "Improve drainage, reduce watering, apply fungicide",
                            "prevention": "Well-draining soil, proper watering schedule"
                        }
                    ]
                }
        except Exception as e:
            print(f"Error loading knowledge base: {e}. Using empty knowledge base.")
            return {}

    def create_prompt(self, symptoms, environment_info=None):
        """
        Create a prompt for the LLM based on symptoms and environment information.

        Args:
            symptoms (str): Description of plant symptoms.
            environment_info (str, optional): Environmental conditions.

        Returns:
            str: Formatted prompt for the model.
        """
        # Base prompt with instructions to the model
        prompt = (
            "You are a plant disease expert system. Generate a detailed report for a plant showing these symptoms:\n\n"
            f"SYMPTOMS: {symptoms}\n\n"
        )

        # Add environment information if provided
        if environment_info:
            prompt += f"GROWING CONDITIONS: {environment_info}\n\n"

        # Add structured output format instructions
        prompt += (
            "Provide a detailed report using this format:\n"
            "1. POSSIBLE DISEASE: [disease name]\n"
            "2. DISEASE DESCRIPTION: [brief description]\n"
            "3. SYMPTOMS ANALYSIS: [detailed analysis of symptoms]\n"
            "4. RECOMMENDED TREATMENT: [treatment options]\n"
            "5. PREVENTION MEASURES: [how to prevent future occurrences]\n"
            "6. ADDITIONAL NOTES: [any other relevant information]\n\n"
        )

        # Add knowledge context from our database if symptoms match
        relevant_diseases = []
        if self.knowledge_base and "common_diseases" in self.knowledge_base:
            for disease in self.knowledge_base["common_diseases"]:
                # Simple keyword matching (could be improved with NLP)
                if any(keyword in symptoms.lower() for keyword in disease["symptoms"].lower().split()):
                    relevant_diseases.append(disease)

        # Add up to 3 most relevant diseases as context
        if relevant_diseases:
            prompt += "REFERENCE INFORMATION:\n"
            for i, disease in enumerate(relevant_diseases[:3]):
                prompt += f"Disease {i+1}: {disease['name']}\n"
                prompt += f"Typical symptoms: {disease['symptoms']}\n"
                prompt += f"Standard treatment: {disease['treatment']}\n\n"

        return prompt

    def generate_report(self, symptoms, environment_info=None, max_length=800):
        """
        Generate a plant disease report based on the provided symptoms.

        Args:
            symptoms (str): Description of plant symptoms.
            environment_info (str, optional): Environmental conditions.
            max_length (int): Maximum length of the generated report.

        Returns:
            dict: Structured report with sections.
        """
        try:
            # Create input prompt
            prompt = self.create_prompt(symptoms, environment_info)

            # Tokenize input
            inputs = self.tokenizer(prompt, return_tensors="pt", padding=True)

            # Generate text
            with torch.no_grad():
                outputs = self.model.generate(
                    inputs.input_ids,
                    max_length=max_length,
                    num_return_sequences=1,
                    do_sample=True,
                    top_p=0.95,
                    temperature=0.7,
                    pad_token_id=self.tokenizer.eos_token_id
                )

            # Decode the output
            generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

            # Extract the report part (after our prompt)
            report_text = generated_text[len(prompt):] if len(generated_text) > len(prompt) else generated_text

            # Parse the structured report
            return self._parse_report(report_text)

        except Exception as e:
            return {
                "error": f"Failed to generate report: {str(e)}",
                "raw_text": "Error occurred during report generation."
            }

    def _parse_report(self, report_text):
        """
        Parse the generated text into a structured report.

        Args:
            report_text (str): Raw generated text.

        Returns:
            dict: Structured report with sections.
        """
        sections = {
            "Possible Disease": "",
            "Disease Description": "",
            "Symptoms Analysis": "",
            "Recommended Treatment": "",
            "Prevention Measures": "",
            "Additional Notes": ""
        }

        # Try to identify sections based on numbered format or keywords
        lines = report_text.split('\n')
        current_section = None

        for line in lines:
            line = line.strip()
            if not line:
                continue

            # Check for numbered sections (1. SECTION NAME: )
            if line[0].isdigit() and ". " in line[:10]:
                section_header = line[line.index(". ")+2:]
                if ":" in section_header:
                    section_name = section_header[:section_header.index(":")].strip()
                    section_content = section_header[section_header.index(":")+1:].strip()

                    # Match to our predefined sections
                    for key in sections.keys():
                        if key.lower() in section_name.lower() or section_name.lower() in key.lower():
                            current_section = key
                            sections[current_section] = section_content
                            break

            # If we have an active section, add content to it
            elif current_section:
                sections[current_section] += " " + line

            # Try to detect section headers by keywords if numbering fails
            else:
                for key in sections.keys():
                    if key.lower() in line.lower():
                        current_section = key
                        content_start = line.lower().find(key.lower()) + len(key)
                        if content_start < len(line):
                            sections[current_section] = line[content_start:].strip(": ")
                        break

        # Clean up the sections
        for key in sections:
            sections[key] = sections[key].strip()

        return sections

    def save_report_to_file(self, report, filename="plant_disease_report.txt"):
        """
        Save the generated report to a text file.

        Args:
            report (dict): The structured report.
            filename (str): The output filename.

        Returns:
            str: Path to the saved file.
        """
        try:
            with open(filename, 'w') as f:
                f.write("PLANT DISEASE ANALYSIS REPORT\n")
                f.write("=============================\n\n")

                for section, content in report.items():
                    if content:
                        f.write(f"{section}:\n")
                        f.write(f"{content}\n\n")

            return os.path.abspath(filename)
        except Exception as e:
            print(f"Error saving report: {e}")
            return None


class PlantDiseaseAPI:
    """
    Alternative implementation using a REST API to a hosted LLM service
    instead of running the model locally.
    """

    def __init__(self, api_key=None, api_url=None):
        """
        Initialize the API client.

        Args:
            api_key (str): API key for the LLM service.
            api_url (str): URL of the LLM API endpoint.
        """
        self.api_key = api_key or os.environ.get("LLM_API_KEY")
        self.api_url = api_url or "https://api.example.com/v1/completions"

        if not self.api_key:
            print("Warning: No API key provided. Set the LLM_API_KEY environment variable.")

    def generate_report(self, symptoms, environment_info=None):
        """
        Generate a plant disease report using an external API.

        Args:
            symptoms (str): Description of plant symptoms.
            environment_info (str, optional): Environmental conditions.

        Returns:
            dict: Structured report with sections.
        """
        # Create the prompt
        prompt = (
            "Generate a detailed plant disease analysis report for a plant with the following symptoms:\n\n"
            f"{symptoms}\n\n"
        )

        if environment_info:
            prompt += f"Growing conditions: {environment_info}\n\n"

        prompt += (
            "Format the response as a structured report with these sections:\n"
            "- Possible Disease\n"
            "- Disease Description\n"
            "- Symptoms Analysis\n"
            "- Recommended Treatment\n"
            "- Prevention Measures\n"
            "- Additional Notes\n"
        )

        # Prepare API request
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json"
        }

        data = {
            "model": "gpt-3.5-turbo",  # Example model name, replace with actual model
            "prompt": prompt,
            "max_tokens": 1000,
            "temperature": 0.7
        }

        try:
            # Send API request
            response = requests.post(self.api_url, headers=headers, json=data)
            response.raise_for_status()

            # Parse API response
            result = response.json()

            if "choices" in result and len(result["choices"]) > 0:
                report_text = result["choices"][0]["text"]

                # Parse the generated text into sections
                sections = {}
                current_section = None

                for line in report_text.split("\n"):
                    line = line.strip()
                    if not line:
                        continue

                    # Check if this line is a section header
                    is_header = False
                    for section in ["Possible Disease", "Disease Description", "Symptoms Analysis",
                                   "Recommended Treatment", "Prevention Measures", "Additional Notes"]:
                        if section in line:
                            current_section = section
                            content = line.replace(section, "", 1).strip(": -")
                            sections[current_section] = content
                            is_header = True
                            break

                    # If not a header and we have a current section, add to it
                    if not is_header and current_section:
                        sections[current_section] += " " + line

                return sections

            return {"error": "No valid response from API"}

        except Exception as e:
            return {"error": f"API request failed: {str(e)}"}


# Example usage
def example_usage():
    """
    Example of how to use the plant disease report generator.
    """
    # Sample input
    symptoms = "Yellow spots on leaves, curling leaf edges, and some white powdery substance on the underside of leaves. The plant is also showing some wilting despite regular watering."
    environment = "Indoor potted plant, bright indirect light, temperature around 75°F, watered twice a week."

    print("Initializing plant disease report generator...")

    try:
        # Try the local model approach
        generator = PlantDiseaseReportGenerator(model_name="gpt2-medium")
        report = generator.generate_report(symptoms, environment)

        print("\n=== PLANT DISEASE REPORT ===")
        for section, content in report.items():
            if content:
                print(f"\n{section}:")
                print(f"{content}")

        # Save report to file
        file_path = generator.save_report_to_file(report)
        if file_path:
            print(f"\nReport saved to: {file_path}")

    except Exception as e:
        print(f"Error with local model: {e}")
        print("\nTrying API approach instead...")

        # Fallback to API approach
        try:
            api_client = PlantDiseaseAPI()
            report = api_client.generate_report(symptoms, environment)

            print("\n=== PLANT DISEASE REPORT (API) ===")
            for section, content in report.items():
                if content:
                    print(f"\n{section}:")
                    print(f"{content}")

        except Exception as api_e:
            print(f"API approach also failed: {api_e}")
            print("\nSuggestion: Try a simpler model or check your dependencies.")


if __name__ == "__main__":
    example_usage()

Initializing plant disease report generator...



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.



=== PLANT DISEASE REPORT ===

Report saved to: /content/datasets/plant_disease_report.txt
