In [1]:
import pandas as pd
import numpy as np
import os
import cv2
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Many thanks to the dataset resource here: https://github.com/wittyicon29/WeedWatch-Weed-Detection-using-CNN/tree/main

In [3]:
import os
import pandas as pd

# Directory containing your images
image_data_dir = "/content/drive/My Drive/CNN/Dataset/train_images"
# Load image filenames
image_filenames = os.listdir(image_data_dir)

# Load the labels CSV file
labels_data = pd.read_csv("/content/drive/My Drive/CNN/Dataset/labels.csv")
# Assuming 'image_filename' is the column name in your CSV that contains the filenames
label_filenames = labels_data['image_filename'].tolist()

I add the following steps because the original dataset has some images without labels. So I deleted those unmatched images.

In [4]:
# Find images without a corresponding label
unmatched_images = [img for img in image_filenames if img not in label_filenames]
print("Unmatched images:", unmatched_images)

Unmatched images: []


In [5]:
# Proceed to delete unmatched images
for img in unmatched_images:
    os.remove(os.path.join(image_data_dir, img))
    print(f"Deleted: {img}")

# Optionally, recheck what remains in the directory
remaining_images = os.listdir(image_data_dir)
print("Remaining images count:", len(remaining_images))

Remaining images count: 916


In [6]:
# Load the image dataset (assuming it's already preprocessed)
image_data_dir = "/content/drive/My Drive/CNN/Dataset/train_images"
image_filenames = os.listdir(image_data_dir)
image_data = []
for filename in image_filenames:
    image_path = os.path.join(image_data_dir, filename)
    image = cv2.imread(image_path)
    image_data.append(image)
image_data = np.array(image_data)

# Load the labels CSV file
labels_data = pd.read_csv("/content/drive/My Drive/CNN/Dataset/labels.csv")

# Merge the image dataset with the labels based on a common key, such as the image filename
combined_data = pd.merge(labels_data, pd.DataFrame({"image_filename": image_filenames}), on="image_filename")

In [7]:
# Prepare the combined dataset for training
X = image_data
y = to_categorical(combined_data["label"])

# Split the combined dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True
)

datagen.fit(X_train)

In [8]:
# Design the CNN model
model = Sequential()
model.add(Conv2D(128, kernel_size=(3, 3), activation="relu", input_shape=(image_data.shape[1:])))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, kernel_size=(3, 3), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64, activation="relu"))
model.add(Dense(2, activation="softmax"))

In [9]:
# Compile the model
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model on the augmented training dataset
### I pick 3 epochs just for a try. Should train more epochs!
model.fit(datagen.flow(X_train, y_train, batch_size=16), epochs=3, validation_data=(X_test, y_test))

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x7d48b7d73dc0>

In [10]:
# Evaluate the model's performance on the testing dataset
score = model.evaluate(X_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

Test loss: 0.6917865872383118
Test accuracy: 0.5380434989929199
