Random Forest, SVM, and KNN

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import local_binary_pattern, graycomatrix, graycoprops
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

In [2]:
dataset_dir = "D:\IIT\Subjects\(4605)IRP\Devlo\Augmented_DataSet"

In [3]:
LBP_RADIUS = 1
LBP_POINTS = 8 * LBP_RADIUS
GLCM_DISTANCES = [1, 2]
GLCM_ANGLES = [0, np.pi/4, np.pi/2, 3*np.pi/4]

In [4]:
def extract_features(image):
    features = {}
    
    image = cv2.resize(image, (128, 128))
    
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Color Features
    features['mean_r'] = np.mean(image[:, :, 0])
    features['mean_g'] = np.mean(image[:, :, 1])
    features['mean_b'] = np.mean(image[:, :, 2])
    features['std_r'] = np.std(image[:, :, 0])
    features['std_g'] = np.std(image[:, :, 1])
    features['std_b'] = np.std(image[:, :, 2])
    
    # Grayscale histogram
    hist = cv2.calcHist([gray], [0], None, [256], [0, 256]).flatten()
    features['hist_mean'] = np.mean(hist)
    features['hist_std'] = np.std(hist)
    
    # Texture Features (LBP)
    lbp = local_binary_pattern(gray, LBP_POINTS, LBP_RADIUS, method="uniform")
    lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, LBP_POINTS + 3), range=(0, LBP_POINTS + 2))
    lbp_hist = lbp_hist.astype("float")
    lbp_hist /= (lbp_hist.sum() + 1e-6)
    features['lbp_mean'] = np.mean(lbp_hist)
    features['lbp_std'] = np.std(lbp_hist)
    
    # Texture Features (GLCM)
    glcm = graycomatrix(gray, distances=GLCM_DISTANCES, angles=GLCM_ANGLES, symmetric=True, normed=True)
    features['glcm_contrast'] = np.mean(graycoprops(glcm, 'contrast'))
    features['glcm_energy'] = np.mean(graycoprops(glcm, 'energy'))
    features['glcm_homogeneity'] = np.mean(graycoprops(glcm, 'homogeneity'))
    
    return features

In [5]:
def process_dataset(image_dir):
    feature_list = []
    labels = []
    
    for label in os.listdir(image_dir):
        class_dir = os.path.join(image_dir, label)
        if not os.path.isdir(class_dir):
            continue
        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            try:
                image = cv2.imread(img_path)
                if image is None:
                    continue
                features = extract_features(image)
                features['label'] = label
                feature_list.append(features)
            except Exception as e:
                print(f"Error processing {img_path}: {e}")
                continue
    
    return pd.DataFrame(feature_list)

In [6]:
print("Extracting features...")
features_df = process_dataset(dataset_dir)

features_df.to_csv("tree_features.csv", index=False)
print("Feature extraction completed.")

Extracting features...
Feature extraction completed.


In [7]:
# Classification
print("Starting classification...")
# Separate features and labels
X = features_df.drop(columns=["label"])
y = features_df["label"]

y = y.astype('category').cat.codes

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Starting classification...


In [8]:
# Initialize models
models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM": SVC(kernel="rbf", random_state=42),
    "KNN": KNeighborsClassifier(n_neighbors=5)
}

In [9]:
for model_name, model in models.items():
    print(f"\nTraining and evaluating {model_name}...")
    
    if model_name in ["SVM", "KNN"]:
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
    
    # Metrics
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred, target_names=features_df['label'].unique()))


Training and evaluating Random Forest...
Accuracy: 0.6510
Classification Report:
              precision    recall  f1-score   support

     healthy       0.64      0.63      0.64       500
   live_wood       0.78      0.75      0.77       473
    pink_wax       0.65      0.68      0.66       471
 stem_canker       0.54      0.54      0.54       470

    accuracy                           0.65      1914
   macro avg       0.65      0.65      0.65      1914
weighted avg       0.65      0.65      0.65      1914


Training and evaluating SVM...
Accuracy: 0.6870
Classification Report:
              precision    recall  f1-score   support

     healthy       0.69      0.63      0.66       500
   live_wood       0.82      0.81      0.81       473
    pink_wax       0.71      0.72      0.72       471
 stem_canker       0.54      0.59      0.56       470

    accuracy                           0.69      1914
   macro avg       0.69      0.69      0.69      1914
weighted avg       0.69      0.