## Check python requirements
 - Python version between 3.8 and 3.11
 - urllib3==1.25.*

In [4]:
import sys

# Get the current Python version as a tuple
current_version = sys.version_info

# Check if the version is within the range 3.8 to 3.11
if (3, 8) <= (current_version.major, current_version.minor) <= (3, 11):
    print(f"Your Python version {current_version.major}.{current_version.minor} is correct")
else:
    print(f"Your Python version {current_version.major}.{current_version.minor} is NOT within the range 3.8–3.11.")


Your Python version 3.7 is NOT within the range 3.8–3.11.


## Imports

In [10]:
import kagglehub
import re
import os
import shutil
import random

ImportError: urllib3 v2.0 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with OpenSSL 1.1.0h  27 Mar 2018. See: https://github.com/urllib3/urllib3/issues/2168

In [7]:
import tensorflow as tf

## Download image dataset

In [2]:
# Download latest version
path = kagglehub.dataset_download("abdallahalidev/plantvillage-dataset")

print("Path to dataset files:", path)
path = path + r"\plantvillage dataset" + r"\color"

Path to dataset files: C:\Users\herde\.cache\kagglehub\datasets\abdallahalidev\plantvillage-dataset\versions\3


## Split dataset into train, validation and test data

In [5]:

# Paths
original_dataset_dir = path  # Path to the original dataset
output_base_dir = 'split_dataset'  # Output directory for train, val, test

if not os.path.isdir(output_base_dir):

    # Create train, val, test directories
    splits = ['train', 'validation', 'test']
    for split in splits:
        split_path = os.path.join(output_base_dir, split)
        os.makedirs(split_path, exist_ok=True)

    # Split ratios
    train_ratio = 0.7
    val_ratio = 0.2
    test_ratio = 0.1

    # Split images
    for class_name in os.listdir(original_dataset_dir):
        class_path = os.path.join(original_dataset_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        # Create class directories in each split folder
        for split in splits:
            os.makedirs(os.path.join(output_base_dir, split, class_name), exist_ok=True)

        # Get all image files
        images = [f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
        random.shuffle(images)

        # Calculate split sizes
        total_images = len(images)
        train_size = int(total_images * train_ratio)
        val_size = int(total_images * val_ratio)

        # Assign images to splits
        train_images = images[:train_size]
        val_images = images[train_size:train_size + val_size]
        test_images = images[train_size + val_size:]

        # Function to copy images
        def copy_images(image_list, split):
            for image in image_list:
                src = os.path.join(class_path, image)
                dest = os.path.join(output_base_dir, split, class_name, image)
                shutil.copy(src, dest)

        # Copy images to respective folders
        copy_images(train_images, 'train')
        copy_images(val_images, 'validation')
        copy_images(test_images, 'test')

    print("Dataset successfully split!")

else:
    print(f"Folder {output_base_dir} already exists")


Folder split_dataset already exists


## Load images into code

In [19]:
!python -c "import sys;print(\"%x\" % sys.maxsize, sys.maxsize > 2**32)"

7fffffffffffffff True


ImportError: Traceback (most recent call last):
  File "c:\Python312\Lib\site-packages\tensorflow\python\pywrap_tensorflow.py", line 70, in <module>
    from tensorflow.python._pywrap_tensorflow_internal import *
ImportError: DLL load failed while importing _pywrap_tensorflow_internal: Proces inicializace dynamicky připojované knihovny (DLL) se nezdařil.


Failed to load the native TensorFlow runtime.
See https://www.tensorflow.org/install/errors for some common causes and solutions.
If you need help, create an issue at https://github.com/tensorflow/tensorflow/issues and include the entire stack trace above this error message.

In [1]:


# Define paths
train_dir = 'dataset/train'
validation_dir = 'dataset/validation'

# Load datasets
train_dataset = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    image_size=(224, 224),  # Resize all images to this size
    batch_size=32          # Number of images per batch
)

validation_dataset = tf.keras.utils.image_dataset_from_directory(
    validation_dir,
    image_size=(224, 224),
    batch_size=32
)

# Optional: Prefetch for performance
AUTOTUNE = tf.data.AUTOTUNE
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE)


ImportError: Traceback (most recent call last):
  File "c:\Python312\Lib\site-packages\tensorflow\python\pywrap_tensorflow.py", line 70, in <module>
    from tensorflow.python._pywrap_tensorflow_internal import *
ImportError: DLL load failed while importing _pywrap_tensorflow_internal: Proces inicializace dynamicky připojované knihovny (DLL) se nezdařil.


Failed to load the native TensorFlow runtime.
See https://www.tensorflow.org/install/errors for some common causes and solutions.
If you need help, create an issue at https://github.com/tensorflow/tensorflow/issues and include the entire stack trace above this error message.

## Define function to make labels readable

In [12]:
# Extracts information from the label
def parse_label(label):
    # Split the label into the flower name and disease part
    label = label.split(" ")[0]
    parts = label.split("___")
    
    # Extract the flower name and make it human-readable
    flower_name = parts[0].replace("_", " ").replace("(", "").replace(")", "")
    
    # Determine if the label indicates a healthy plant
    is_healthy = "healthy" in label
    
    # Extract the disease name or mark it as healthy
    if is_healthy:
        disease = "healthy"
    else:
        disease = parts[1].replace("_", " ").replace("(", "").replace(")", "")
    
    return flower_name, is_healthy, disease

# Example usage

print(parse_label("Corn_(maize)___Northern_Leaf_Blight"))
print(parse_label("Cherry_(including_sour)___Powdery_mildew"))
print(parse_label("Blueberry___healthy"))
print(parse_label("Tomato___Spider_mites Two-spotted_spider_mite"))




('Corn maize', False, 'Northern Leaf Blight')
('Cherry including sour', False, 'Powdery mildew')
('Blueberry', True, 'healthy')
('Tomato', False, 'Spider mites')
