### Modeling

Lessons from first modeling notebook

Using around 3-5 diverse base models is a good starting point. (I used 3 in the first notebook and it seemed to be ok)

Including a CNN as one of the base models is a good idea, especially since CNNs are powerful for image data. (Training accuracy was always > 80% 

train and evaluate each model independently before adding it to the ensemble. (I was kindof doing this but only as it appeard when the code was running.)

### Plan

Define and Train Diverse Models

Evaluate and Select Models

Combine Models in an Ensemble

In [36]:
# Import necessary libraries
import sys
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import joblib

# Add the parent directory to the sys.path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

# Import the functions from the utils script
from utils import load_images_from_folder, prepare_data

In [3]:
dataset_path = "../Raw Data/Fruit And Vegetable Diseases Dataset"
image_size = (224, 224)
batch_size = 16

In [4]:
# Prepare the data
all_images, all_labels = prepare_data("../Raw Data/Fruit And Vegetable Diseases Dataset")



In [5]:
# Split the dataset into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(all_images, all_labels, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print("Training set shape:", X_train.shape, y_train.shape)
print("Validation set shape:", X_val.shape, y_val.shape)
print("Test set shape:", X_test.shape, y_test.shape)

Training set shape: (23432, 224, 224, 3) (23432,)
Validation set shape: (2929, 224, 224, 3) (2929,)
Test set shape: (2930, 224, 224, 3) (2930,)


In [6]:
# CNN Model
def build_cnn(input_shape):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),
        Conv2D(64, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),
        Conv2D(128, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [7]:
# Prepare Image Data Generator
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [8]:
train_generator = datagen.flow(X_train, y_train, batch_size=16)

In [9]:
validation_generator = ImageDataGenerator().flow(X_val, y_val, batch_size=16)

In [10]:
# Train CNN Model
cnn_model = build_cnn((224, 224, 3))
cnn_model.fit(train_generator, epochs=10, validation_data=validation_generator)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10


  self._warn_if_super_not_called()


[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m498s[0m 338ms/step - accuracy: 0.6346 - loss: 4.0974 - val_accuracy: 0.6859 - val_loss: 0.7238
Epoch 2/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m496s[0m 338ms/step - accuracy: 0.6750 - loss: 0.6182 - val_accuracy: 0.5097 - val_loss: 0.9717
Epoch 3/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m495s[0m 337ms/step - accuracy: 0.6681 - loss: 0.6089 - val_accuracy: 0.6258 - val_loss: 1.6989
Epoch 4/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m499s[0m 340ms/step - accuracy: 0.6742 - loss: 0.6115 - val_accuracy: 0.6910 - val_loss: 0.5471
Epoch 5/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m509s[0m 347ms/step - accuracy: 0.7151 - loss: 0.5650 - val_accuracy: 0.6463 - val_loss: 0.5932
Epoch 6/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m506s[0m 345ms/step - accuracy: 0.7274 - loss: 0.5344 - val_accuracy: 0.8126 - val_loss: 0.4674
Epo

<keras.src.callbacks.history.History at 0x32e28fe90>

In [11]:
# Evaluate CNN Model
cnn_pred_val = cnn_model.predict(X_val)
cnn_pred_val = (cnn_pred_val > 0.5).astype(int).flatten()
print("CNN Model Validation Accuracy:", accuracy_score(y_val, cnn_pred_val))

[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 107ms/step
CNN Model Validation Accuracy: 0.8726527825196313


In [12]:
# Train Random Forest Model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train.reshape(X_train.shape[0], -1), y_train)

In [13]:
# Evaluate Random Forest Model
rf_pred_val = rf_model.predict(X_val.reshape(X_val.shape[0], -1))
print("Random Forest Model Validation Accuracy:", accuracy_score(y_val, rf_pred_val))

Random Forest Model Validation Accuracy: 0.9102082622055309


In [14]:
def build_mlp(input_shape):
    model = Sequential([
        Flatten(input_shape=input_shape),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(1, activation='sigmoid')  # Binary classification
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [26]:
# Reshape the data for the MLP model
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_val_flat = X_val.reshape(X_val.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

In [16]:
# Build and train the MLP model
mlp_model = build_mlp((X_train_flat.shape[1],))
mlp_model.fit(X_train_flat, y_train, epochs=10, batch_size=16, validation_data=(X_val_flat, y_val))

  super().__init__(**kwargs)


Epoch 1/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 26ms/step - accuracy: 0.5624 - loss: 0.7970 - val_accuracy: 0.6647 - val_loss: 1.7501
Epoch 2/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 26ms/step - accuracy: 0.6405 - loss: 0.6291 - val_accuracy: 0.6740 - val_loss: 0.6148
Epoch 3/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 26ms/step - accuracy: 0.6364 - loss: 0.6274 - val_accuracy: 0.6306 - val_loss: 0.6511
Epoch 4/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 26ms/step - accuracy: 0.6412 - loss: 0.6284 - val_accuracy: 0.6688 - val_loss: 0.6218
Epoch 5/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 26ms/step - accuracy: 0.6362 - loss: 0.6324 - val_accuracy: 0.6798 - val_loss: 1.0212
Epoch 6/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 26ms/step - accuracy: 0.6399 - loss: 0.6317 - val_accuracy: 0.6740 - val_loss: 0.6743
Epoc

<keras.src.callbacks.history.History at 0x3136ecd50>

In [19]:
# Evaluate the MLP model
mlp_pred_val = mlp_model.predict(X_val_flat)
mlp_pred_val = (mlp_pred_val > 0.5).astype(int).flatten()
print("MLP Model Validation Accuracy:", accuracy_score(y_val, mlp_pred_val))

[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step
MLP Model Validation Accuracy: 0.6746329805394332


In [20]:
# Second CNN Model with a different architecture or parameters
def build_cnn_v2(input_shape):
    model = Sequential([
        Conv2D(64, (3, 3), activation='relu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),
        Conv2D(128, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),
        Conv2D(256, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [22]:
# Train and evaluate the second CNN model
cnn_model_v2 = build_cnn_v2((224, 224, 3))
cnn_model_v2.fit(train_generator, epochs=10, validation_data=validation_generator)
cnn_v2_pred_val = cnn_model_v2.predict(X_val)
cnn_v2_pred_val = (cnn_v2_pred_val > 0.5).astype(int).flatten()
print("Second CNN Model Validation Accuracy:", accuracy_score(y_val, cnn_v2_pred_val))

Epoch 1/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1321s[0m 900ms/step - accuracy: 0.6262 - loss: 9.2602 - val_accuracy: 0.7747 - val_loss: 0.5450
Epoch 2/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1305s[0m 890ms/step - accuracy: 0.6769 - loss: 0.6152 - val_accuracy: 0.6664 - val_loss: 0.6392
Epoch 3/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1314s[0m 896ms/step - accuracy: 0.6549 - loss: 0.6333 - val_accuracy: 0.7395 - val_loss: 0.5562
Epoch 4/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1315s[0m 897ms/step - accuracy: 0.6326 - loss: 0.6490 - val_accuracy: 0.7163 - val_loss: 0.5563
Epoch 5/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1347s[0m 919ms/step - accuracy: 0.6468 - loss: 0.6274 - val_accuracy: 0.5702 - val_loss: 0.6675
Epoch 6/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1328s[0m 906ms/step - accuracy: 0.6387 - loss: 0.6141 - val_accuracy: 0.7757 - val

In [23]:
# Second MLP Model with different parameters
def build_mlp_v2(input_shape):
    model = Sequential([
        Flatten(input_shape=input_shape),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(0.4),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.4),
        Dense(1, activation='sigmoid')  # Binary classification
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [25]:
# Train and evaluate the second MLP model
mlp_model_v2 = build_mlp_v2((X_train_flat.shape[1],))
mlp_model_v2.fit(X_train_flat, y_train, epochs=10, batch_size=16, validation_data=(X_val_flat, y_val))
mlp_v2_pred_val = mlp_model_v2.predict(X_val_flat)
mlp_v2_pred_val = (mlp_v2_pred_val > 0.5).astype(int).flatten()
print("Second MLP Model Validation Accuracy:", accuracy_score(y_val, mlp_v2_pred_val))

Epoch 1/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 97ms/step - accuracy: 0.5826 - loss: 0.7660 - val_accuracy: 0.6658 - val_loss: 0.6146
Epoch 2/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 96ms/step - accuracy: 0.6488 - loss: 0.6188 - val_accuracy: 0.6664 - val_loss: 0.6988
Epoch 3/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 96ms/step - accuracy: 0.6675 - loss: 0.6004 - val_accuracy: 0.5582 - val_loss: 1.1058
Epoch 4/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 96ms/step - accuracy: 0.6645 - loss: 0.6031 - val_accuracy: 0.7340 - val_loss: 0.7512
Epoch 5/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 95ms/step - accuracy: 0.6620 - loss: 0.6020 - val_accuracy: 0.6869 - val_loss: 0.8426
Epoch 6/10
[1m1465/1465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 95ms/step - accuracy: 0.6527 - loss: 0.6052 - val_accuracy: 0.7197 - val_loss: 1.489

### ENSEMBLE TIME

In [27]:
# Combine models in an ensemble
models = [cnn_model, cnn_model_v2, rf_model, mlp_model, mlp_model_v2]

In [30]:
# Generate predictions for the training set using each model
train_predictions_stack = np.zeros((len(X_train_flat), len(models)))

for i, model in enumerate(models):
    if isinstance(model, tf.keras.Model):
        if len(model.input_shape) == 2:  # MLP model
            predictions = model.predict(X_train_flat).flatten()
        else:  # CNN model
            predictions = model.predict(X_train).flatten()
    else:
        predictions = model.predict(X_train_flat).flatten()
    
    train_predictions_stack[:, i] = predictions

[1m733/733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 107ms/step
[1m733/733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m208s[0m 282ms/step
[1m733/733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step
[1m733/733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step


In [31]:
# Generate predictions for the test set using each model
test_predictions_stack = np.zeros((len(X_test), len(models)))

for i, model in enumerate(models):
    if isinstance(model, tf.keras.Model):
        if len(model.input_shape) == 2:  # MLP model
            predictions = model.predict(X_test_flat).flatten()
        else:  # CNN model
            predictions = model.predict(X_test).flatten()
    else:
        predictions = model.predict(X_test_flat).flatten()
    
    test_predictions_stack[:, i] = predictions

[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 104ms/step
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 276ms/step
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step


In [32]:
# Train a Gradient Boosting model as the meta-model
meta_model = GradientBoostingClassifier(random_state=42)
meta_model.fit(train_predictions_stack, y_train)

# Use the meta-model to make the final prediction
meta_predictions = meta_model.predict(test_predictions_stack)

In [35]:
# Evaluate the stacking model
stacking_accuracy = accuracy_score(y_test, meta_predictions)
stacking_conf_matrix = confusion_matrix(y_test, meta_predictions)
stacking_class_report = classification_report(y_test, meta_predictions, zero_division=0)

print(f"Stacking Test Accuracy: {stacking_accuracy * 100:.2f}%")
print("Confusion Matrix:")
print(stacking_conf_matrix)
print("Classification Report")
print(stacking_class_report)

Stacking Test Accuracy: 89.59%
Confusion Matrix:
[[1214  148]
 [ 157 1411]]
Classification Report
              precision    recall  f1-score   support

           0       0.89      0.89      0.89      1362
           1       0.91      0.90      0.90      1568

    accuracy                           0.90      2930
   macro avg       0.90      0.90      0.90      2930
weighted avg       0.90      0.90      0.90      2930



In [37]:
models_dir = os.path.join(os.path.dirname(os.getcwd()), 'models')

In [39]:
# Save Keras models
cnn_model.save(os.path.join(models_dir, 'cnn_model.keras'))
cnn_model_v2.save(os.path.join(models_dir, 'cnn_model_v2.keras'))
mlp_model.save(os.path.join(models_dir, 'mlp_model.keras'))
mlp_model_v2.save(os.path.join(models_dir, 'mlp_model_v2.keras'))

# Save non-Keras models
joblib.dump(rf_model, os.path.join(models_dir, 'rf_model.pkl'))
joblib.dump(meta_model, os.path.join(models_dir, 'meta_model.pkl'))

['/Users/jin/Desktop/Rotten Vs Fresh Fruit Prediction/models/meta_model.pkl']