<a href="https://colab.research.google.com/github/Dhrupu/CNN/blob/main/Boosted_Tree.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import os
import cv2
import random
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
from sklearn.decomposition import PCA
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model

In [None]:
# Define image size
IMG_SIZE = (128, 128)

# Load images and extract features
def load_images_and_labels(data_dir, classes):
    X, y = [], []

    for label, class_name in enumerate(classes):
        class_dir = os.path.join(data_dir, class_name)
        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            try:
                img = cv2.imread(img_path)
                img = cv2.resize(img, IMG_SIZE)
                X.append(img)
                y.append(label)
            except Exception as e:
                print(f"Error loading image {img_name}: {e}")
                continue

    return np.array(X), np.array(y)


In [None]:
# Set dataset paths
train_dir = "/content/drive/MyDrive/guava dataset/GuavaDiseaseDataset/train"
val_dir = "/content/drive/MyDrive/guava dataset/GuavaDiseaseDataset/val"


In [None]:
# Get class labels
classes = os.listdir(train_dir)

# Load train and validation data
X_train, y_train = load_images_and_labels(train_dir, classes)
X_val, y_val = load_images_and_labels(val_dir, classes)

# Normalize image data
X_train = X_train / 255.0
X_val = X_val / 255.0


In [None]:
# Feature Extraction using VGG16 (Pretrained CNN)
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(128, 128, 3))
feature_extractor = Model(inputs=base_model.input, outputs=base_model.output)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# Extract features
X_train_features = feature_extractor.predict(X_train)
X_val_features = feature_extractor.predict(X_val)

[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m494s[0m 6s/step
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 6s/step


In [None]:
# Flatten features for Gradient Boosting
X_train_features = X_train_features.reshape(X_train_features.shape[0], -1)
X_val_features = X_val_features.reshape(X_val_features.shape[0], -1)


In [None]:
# Dimensionality reduction using PCA
pca = PCA(n_components=100)  # Reduce to 100 components (can adjust)
X_train_features_pca = pca.fit_transform(X_train_features)
X_val_features_pca = pca.transform(X_val_features)

In [None]:

# Define the hyperparameters and distributions to sample from
param_dist = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}

# Initialize the model and RandomizedSearchCV
gbm_random_search = RandomizedSearchCV(GradientBoostingClassifier(), param_distributions=param_dist, n_iter=10, cv=3, scoring='accuracy', random_state=42, n_jobs=-1)

# Fit the model
gbm_random_search.fit(X_train_features_pca, y_train)

# Best model and parameters
best_gbm_random = gbm_random_search.best_estimator_
print(f"Best Parameters (Randomized Search): {gbm_random_search.best_params_}")

# Predict on validation data
y_pred = best_gbm_random.predict(X_val_features_pca)


Best Parameters (Randomized Search): {'n_estimators': 200, 'max_depth': 3, 'learning_rate': 0.1}


In [None]:

# Evaluate the model
accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred, average='weighted')  # Weighted for class imbalance
recall = recall_score(y_val, y_pred, average='weighted')
f1 = f1_score(y_val, y_pred, average='weighted')

# Print Evaluation Metrics
print(f"Validation Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

# Detailed classification report
print("\nClassification Report:")
print(classification_report(y_val, y_pred, target_names=classes))

Validation Accuracy: 0.9219
Precision: 0.9218
Recall: 0.9219
F1 Score: 0.9218

Classification Report:
               precision    recall  f1-score   support

    fruit_fly       0.91      0.89      0.90       262
healthy_guava       0.90      0.92      0.91       185
  Anthracnose       0.94      0.95      0.94       308

     accuracy                           0.92       755
    macro avg       0.92      0.92      0.92       755
 weighted avg       0.92      0.92      0.92       755

