In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import cv2

In [2]:
# Load the Excel dataset
excel_file = 'labels.csv'  # Path to your Excel file
df = pd.read_csv(excel_file)

# Preview dataset
print(df.head())

                Filename  Label       Species
0  20160928-140314-0.jpg      0  Chinee apple
1  20160928-140337-0.jpg      0  Chinee apple
2  20160928-140731-0.jpg      0  Chinee apple
3  20160928-140747-0.jpg      0  Chinee apple
4  20160928-141107-0.jpg      0  Chinee apple


In [3]:
# Set the image folder path and target image size
image_folder = 'images/'  # Folder containing images
image_size = (128, 128)  # Resize all images to 128x128

In [4]:
# Function to load and preprocess images
def load_and_preprocess_images(image_folder, filenames, image_size):
    images = [] #crate and save in this array
    for filename in filenames:
        image_path = os.path.join(image_folder, filename)
        image = cv2.imread(image_path)
        if image is not None:
            image = cv2.resize(image, image_size)  # Resize image
            images.append(image)
        else:
            print(f"Image not found: {filename}")
    return np.array(images)

In [5]:
# Load images using filenames from Excel file
images = load_and_preprocess_images(image_folder, df['Filename'], image_size)

In [6]:
# Encode the labels (weed varieties)
le = LabelEncoder()
labels = le.fit_transform(df['Species'])
labels = to_categorical(labels)  # Convert to one-hot encoding

# Split dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

In [7]:
# Normalize pixel values to [0, 1] range
X_train = X_train / 255.0
X_test = X_test / 255.0

# CNN model definition
def create_model(input_shape):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),#input layer
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(len(le.classes_), activation='softmax')  # Output layer
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [8]:
# Create the CNN model
input_shape = (image_size[0], image_size[1], 3)  # 3 channels for RGB
model = create_model(input_shape)

# Train the model
batch_size = 64
epochs = 15
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/15
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 566ms/step - accuracy: 0.1792 - loss: 1.9919 - val_accuracy: 0.3165 - val_loss: 1.7210
Epoch 2/15
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 518ms/step - accuracy: 0.3877 - loss: 1.5608 - val_accuracy: 0.3804 - val_loss: 1.5536
Epoch 3/15
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 621ms/step - accuracy: 0.4788 - loss: 1.3780 - val_accuracy: 0.4621 - val_loss: 1.3511
Epoch 4/15
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 520ms/step - accuracy: 0.5348 - loss: 1.2414 - val_accuracy: 0.4874 - val_loss: 1.3546
Epoch 5/15
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 538ms/step - accuracy: 0.6081 - loss: 1.0706 - val_accuracy: 0.5542 - val_loss: 1.2002
Epoch 6/15
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 509ms/step - accuracy: 0.6570 - loss: 0.9455 - val_accuracy: 0.5349 - val_loss: 1.2777
Epoch 7/15
[1m95/95[

In [9]:
# Save the model
model.save('weed_classification_model.h5')



In [10]:
# Predict on a test image (example)
sample_image = "images/20170711-115741-0.jpg"
#test and reshape
sample_image = X_test[0].reshape(1, image_size[0], image_size[1], 3)
#predict image
prediction = model.predict(sample_image) # precictedimage come as array
predicted_class = le.inverse_transform([np.argmax(prediction)])
print(f"Predicted weed variety: {predicted_class[0]}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 221ms/step
Predicted weed variety: Chinee apple


In [11]:
import json

# Save the class labels (weed species) to a JSON file
class_labels = list(le.classes_)  # List of species names
with open('class_labels.json', 'w') as f:
    json.dump(class_labels, f)