<a href="https://colab.research.google.com/github/GauravPathak2023/PlantDocBot/blob/main/PlantChatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Import and folder creation
import os

#create project folder
base = "/content/PlantDocBot"
os.makedirs(os.path.join(base,"data","plantvillage"),exist_ok=True)
os.makedirs(os.path.join(base,"data","plantdoc"),exist_ok=True)
os.makedirs(os.path.join(base,"data","text_corpus"),exist_ok=True)

print("Folders created under",base)

Folders created under /content/PlantDocBot


In [6]:
#Download Dataset via git clone

base = "/content/PlantDocBot"
!git clone https://github.com/spMohanty/plantvillage-Dataset.git {base}/data/plantvillage
!git clone https://github.com/pratikkayal/PlantDoc-Dataset.git {base}/data/plantdoc


Cloning into '/content/PlantDocBot/data/plantvillage'...
remote: Enumerating objects: 163235, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 163235 (delta 2), reused 1 (delta 0), pack-reused 163229 (from 1)[K
Receiving objects: 100% (163235/163235), 2.00 GiB | 21.23 MiB/s, done.
Resolving deltas: 100% (101/101), done.
Updating files: 100% (182401/182401), done.
Cloning into '/content/PlantDocBot/data/plantdoc'...
remote: Enumerating objects: 2670, done.[K
remote: Counting objects: 100% (35/35), done.[K
remote: Compressing objects: 100% (13/13), done.[K
remote: Total 2670 (delta 22), reused 22 (delta 22), pack-reused 2635 (from 1)[K
Receiving objects: 100% (2670/2670), 932.92 MiB | 15.32 MiB/s, done.
Resolving deltas: 100% (24/24), done.
Updating files: 100% (2581/2581), done.


In [7]:
# Verify Dataset directories and list top-level content

for sub in ["plantvillage","plantdoc"]:
  path = os.path.join(base,"data",sub)
  print("\nContents of",sub,":")
  print(os.listdir(path)[:20])


Contents of plantvillage :
['leaf-map.json', 'generate_data_grayscale-80-20.sh', 'generate_data_segmented-20-80.sh', 'slurm-476492.out', 'slurm-476489.out', 'data_distribution_for_SVM', 'create_data_distribution.py', 'generate_data_segmented-50-50.sh', 'slurm-476485.out', '_generate_data.sh', 'slurm-476486.out', 'slurm-476487.out', 'create_db.py', 'slurm-476481.out', 'generate_data_color-50-50.sh', 'slurm-476493.out', 'generate_data_grayscale-20-80.sh', 'utils', 'slurm-476490.out', 'generate_mapstring.py']

Contents of plantdoc :
['LICENSE.txt', 'test', 'train', 'PlantDoc_Examples.png', 'README.md', '.git']


In [8]:
# Search for image directories inside PlantVillage

import os
pv_base = os.path.join(base,"data","plantvillage")
img_exts = ('.jpg','.jpeg','.png','.bmp')

found_dirs = []
for root_dir, dirs, files in os.walk(pv_base):
  count = sum(1 for f in files if f.lower().endswith(img_exts))
  if count>0:
    found_dirs.append((root_dir,count))

if not found_dirs:
  print("No image files found inside PlantVillage Folder.")
else:
  print("Found image directories. Sample list(first 10)")
  for d,c in found_dirs[:10]:
    print(" ",d,"-",c,"image")
  # Choose first as img_root
  img_root = found_dirs[0][0]
  print("\nUsing image root:",img_root)

Found image directories. Sample list(first 10)
  /content/PlantDocBot/data/plantvillage/data_distribution_for_SVM/test/35 - 1103 image
  /content/PlantDocBot/data/plantvillage/data_distribution_for_SVM/test/16 - 440 image
  /content/PlantDocBot/data/plantvillage/data_distribution_for_SVM/test/23 - 180 image
  /content/PlantDocBot/data/plantvillage/data_distribution_for_SVM/test/36 - 78 image
  /content/PlantDocBot/data/plantvillage/data_distribution_for_SVM/test/10 - 221 image
  /content/PlantDocBot/data/plantvillage/data_distribution_for_SVM/test/21 - 204 image
  /content/PlantDocBot/data/plantvillage/data_distribution_for_SVM/test/12 - 232 image
  /content/PlantDocBot/data/plantvillage/data_distribution_for_SVM/test/18 - 129 image
  /content/PlantDocBot/data/plantvillage/data_distribution_for_SVM/test/30 - 369 image
  /content/PlantDocBot/data/plantvillage/data_distribution_for_SVM/test/15 - 1166 image

Using image root: /content/PlantDocBot/data/plantvillage/data_distribution_for_SV

In [14]:
# Robust color display
import matplotlib.pyplot as plt
from PIL import Image
import random
import os
import numpy as np

#use img_root from previous cell
if'img_root' in globals():
  sample_file = None
  for root_dir, dirs, files in os.walk(img_root):
    img_files = [f for f in files if f.lower().endswith(img_exts)]
    if img_files:
      sample_file =os.path.join(root_dir, random.choice(img_files))
      break
      if img_file:
        print("Displaying color image:",sample_file)
        img = Image.open(sample_file)

        #Check mode
        print("Original image mode:",img.mode)

        #Convert to true RGB if not already
        if img.mode != 'RGB':
          img = img.convert('RGB')
        #Use Numpy+matplotlib to ensure correct color display
        plt.figure(figsize=(6,6))
        plt.imshow(np.asarray(img))
        plt.axis('off')
        plt.show()
    else:
          print("No images found under img_root.")
  else:
          print("img_root not defined-previous detection failed.")

In [15]:
# Build CSV Mapping image path <------
import pandas as pd
records = []
if 'img_root' in globals():
  for root_dir, dirs, files in os.walk(img_root):
    for f in files:
      if f.lower().endswith(img_exts):
        path = os.path.join(root_dir,f)
        # Infer label: directory name relative to img_root
        rel = os.path.relpath(path,img_root)
        label = rel.split(os.sep)[0] # first folder after img_root
        records.append({"image_path":path, "label":label})

df = pd.DataFrame(records)
print("Total images found:",len(df))
print("Sample rows:")
print(df.head())
out_csv = os.path.join(base,"data","image_data.csv")
df.to_csv(out_csv, index=False)
print("Saved mapping to",out_csv)

Total images found: 1103
Sample rows:
                                          image_path  \
0  /content/PlantDocBot/data/plantvillage/data_di...   
1  /content/PlantDocBot/data/plantvillage/data_di...   
2  /content/PlantDocBot/data/plantvillage/data_di...   
3  /content/PlantDocBot/data/plantvillage/data_di...   
4  /content/PlantDocBot/data/plantvillage/data_di...   

                                      label  
0  f5317a39-863b-4f0e-aefa-5f7fa1fee919.JPG  
1  e6e094e7-ab1c-46d0-89e8-5748bcbaaf3d.JPG  
2  e978d986-0cd0-4271-9816-56c29d4f505b.JPG  
3  6e92b842-a86f-4b36-887b-48e5099dc20e.JPG  
4  1398f4be-cdb2-4fb7-b730-5b20df2d54a5.JPG  
Saved mapping to /content/PlantDocBot/data/image_data.csv
