In [43]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [44]:
import numpy as np
import pandas as pd
import os
import cv2
import zipfile
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense

In [45]:
# Load Dataset
DATASET_PATH = "/content/drive/MyDrive/CSE475 - SEC2/MangoLeafBD Dataset.zip"
EXTRACTED_PATH = "/content/mango_leaf_dataset"

# Extract dataset if not already extracted
if not os.path.exists(EXTRACTED_PATH):
    with zipfile.ZipFile(DATASET_PATH, 'r') as zip_ref:
        zip_ref.extractall(EXTRACTED_PATH)

categories = os.listdir(EXTRACTED_PATH)
print("Categories:", categories)

data = []
labels = []
image_size = (224, 224)

Categories: ['MangoLeafBD Dataset']


In [46]:
# Read images and resize
for category in categories:
    path = os.path.join(EXTRACTED_PATH, category)
    for img in os.listdir(path):
        img_path = os.path.join(path, img)
        image = cv2.imread(img_path)
        if image is not None:  # Ensure the image is read correctly
            image = cv2.resize(image, image_size)
            data.append(image)
            labels.append(category)
        else:
            print(f"Failed to load image: {img_path}")  # Print error message for failed image loads

In [58]:
# Convert lists to arrays
data = np.array(data)
labels = np.array(labels)

print("Data shape:", data.shape)  # Print the shape of the 'data' array
print("Labels shape:", labels.shape)  # Print the shape of the 'labels' array

Data shape: (0,)
Labels shape: (0,)


In [48]:
# Encode labels
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)


In [57]:
# Normalize images
data = data / 255.0

In [59]:
# Split dataset
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(labels)

X_train, X_test, y_train, y_test = train_test_split(data, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [None]:
# Feature extraction using VGG16
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
flatten = Flatten()(base_model.output)
model = Model(inputs=base_model.input, outputs=flatten)

X_train_features = model.predict(X_train)
X_test_features = model.predict(X_test)

In [None]:
# Train Decision Tree
dt = DecisionTreeClassifier(criterion='entropy', max_depth=5, random_state=42)
dt.fit(X_train_features, y_train)
y_pred_dt = dt.predict(X_test_features)

In [None]:
# Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_features, y_train)
y_pred_rf = rf.predict(X_test_features)

In [None]:
# Evaluate Models
def evaluate_model(y_test, y_pred, model_name):
    print(f"{model_name} Results:")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("--------------------------------------")

evaluate_model(y_test, y_pred_dt, "Decision Tree")
evaluate_model(y_test, y_pred_rf, "Random Forest")