# AI PlantDocBot: Intelligent Plant Disease Diagnosis

Project notebook for Day 1 & Day 2: environment setup, dataset download, data mapping, and sample image display.


## Objective
To develop an AI-powered chatbot that diagnoses plant diseases from uploaded leaf images or symptom text. This notebook contains Day 1 (environment & dataset) and Day 2 (data mapping & visualization) code blocks.


## Day 1 — Environment & Dataset Download
Create folders and download datasets (run shell git commands in a notebook environment such as Colab).


In [None]:
#Day 1
#import and Folder Creation
import os 
from pathlib import Path

# create project folder
base="/content/PlantDocBot"
Path(base).mkdir(parents=True, exist_ok=True)
os.makedirs(os.path.join(base,"data","plantvillage"),exist_ok=True)
os.makedirs(os.path.join(base,"data","plantdoc"),exist_ok=True)
os.makedirs(os.path.join(base,"data","text_corpus"),exist_ok=True)

print("Folders created under",base)

#Download Dataset via git clone (WORKING COLAB VERSION)
!git clone https://github.com/spMohanty/plantvillage-Dataset.git "{base}/data/plantvillage"
!git clone https://github.com/pratikkayal/PlantDoc-Dataset.git "{base}/data/plantdoc"

import os

#Verify Dataset directories and list top level content
for sub in ["plantvillage","plantdoc"]:
  path=os.path.join(base,"data",sub)
  print("\nContents of ",sub, ":")
  print(os.listdir(path)[:20])

pv_base = os.path.join(base, "data", "plantvillage")
img_exts = ('.jpg', '.jpeg', '.png', '.bmp')
found_dirs = []
for root_dir, dirs, files in os.walk(pv_base):
    count = sum(1 for f in files if f.lower().endswith(img_exts))
    if count > 0:
        found_dirs.append((root_dir, count))
if not found_dirs:
    print("No image files found inside PlantVillage Folder.")
else:
    print("Found image directories. Sample list(first 10):")
    for d, c in found_dirs[:10]:
        print(" ", d, "-", c, "images")
    img_root = found_dirs[0][0]
    print("\nUsing image root:", img_root)


## Day 2 — Build CSV mapping (image_path -> label)
This cell collects every image under `img_root`, infers the label from the first subfolder, and saves `image_data.csv`.


In [None]:
#Day 2
import matplotlib.pyplot as plt
from PIL import Image
import random
import os
import numpy as np

#use img_root from previous cell
if 'img_root' in globals():
  sample_file = None
  for root_dir, dirs, files in os.walk(img_root):
    img_files = [f for f in files if f.lower().endswith(img_exts)]
    if img_files:
      sample_file =os.path.join(root_dir, random.choice(img_files))
      break

  if sample_file:
        print("Displaying color image:",sample_file)
        img = Image.open(sample_file)

        #Check mode
        print("Original image mode:",img.mode)

        #Convert to true RGB if not already
        if img.mode != 'RGB':
          img = img.convert('RGB')
        #Use Numpy+matplotlib to ensure correct color display
        plt.figure(figsize=(6,6))
        plt.imshow(np.asarray(img))
        plt.axis('off')
        plt.show()
  else:
        print("No images found under img_root.")
else:
        print("img_root not defined-previous detection failed.")

#Build CSV Mapping image path 
import pandas as pd
records=[]
if 'img_root' in globals():
  for root_dir,dirs,files in os.walk(img_root):
    for f in files:
      if f.lower().endswith(img_exts):
        path=os.path.join(root_dir,f)
        #infer label:directory name relative to img_root
        rel=os.path.relpath(path,img_root)
        label=rel.split(os.sep)[0]
        records.append({"image_path":path,"label":label})

  df=pd.DataFrame(records)
  print("Total images found:",len(df))
  print("Sample rows :")
  print(df.head())

  out_csv=os.path.join(base,"data","image_data.csv")
  df.to_csv(out_csv,index=False)
  print("Saved mapping to",out_csv)

