In [2]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import graycomatrix, graycoprops
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib 

In [3]:
root_folder = "D:\IIT\Subjects\(4605)IRP\Devlo\Augmented_DataSet"
classes = ['Healthy', 'Pink_Wax', 'Live_Wood', 'Stem_Canker'] # [0, 1, 2, 3]

In [4]:
IMG_SIZE = (224, 224)

# Extract the Texture features and sav

In [5]:
# Initialize feature storage
features = []
labels = []

# Preprocessing and Feature Extraction
for label, class_name in enumerate(classes):
    class_path = os.path.join(root_folder, class_name)
    for img_name in tqdm(os.listdir(class_path), desc=f"Processing {class_name}"):
        img_path = os.path.join(class_path, img_name)
        
        # Load and preprocess image
        img = cv2.imread(img_path)
        if img is None:
            continue  # Skip unreadable files
        img_resized = cv2.resize(img, IMG_SIZE)  # Resize
        img_gray = cv2.cvtColor(img_resized, cv2.COLOR_BGR2GRAY)  # Convert to grayscale

        # Normalize the image
        img_normalized = img_gray / 255.0

        # Feature extraction: Texture features using GLCM
        glcm = graycomatrix(img_gray, distances=[5], angles=[0], levels=256, symmetric=True, normed=True)
        contrast = graycoprops(glcm, 'contrast')[0, 0]
        homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
        energy = graycoprops(glcm, 'energy')[0, 0]
        correlation = graycoprops(glcm, 'correlation')[0, 0]

        # Combine features
        feature_vector = [contrast, homogeneity, energy, correlation]
        features.append(feature_vector)
        labels.append(label)

Processing Healthy: 100%|██████████| 2500/2500 [00:41<00:00, 60.20it/s]
Processing Pink_Wax: 100%|██████████| 2355/2355 [00:42<00:00, 56.03it/s]
Processing Live_Wood: 100%|██████████| 2365/2365 [00:43<00:00, 54.21it/s]
Processing Stem_Canker: 100%|██████████| 2350/2350 [00:48<00:00, 48.54it/s]


In [6]:
feature_df = pd.DataFrame(features, columns=['Contrast', 'Homogeneity', 'Energy', 'Correlation'])
feature_df['Label'] = labels

In [7]:
feature_df.to_csv("ml_features.csv", index=False)

print("Feature extraction completed. Features saved to 'ml_features.csv'.")

Feature extraction completed. Features saved to 'ml_features.csv'.


# Train only on Texture Features

In [8]:
data = pd.read_csv("ml_features.csv")

In [9]:
X = data.drop("Label", axis=1)  
y = data["Label"] 

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [11]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [16]:
joblib.dump(scaler, "D:\IIT\Subjects\(4605)IRP\Devlo\models\scaler.joblib")

['D:\\IIT\\Subjects\\(4605)IRP\\Devlo\\models\\scaler.joblib']

In [17]:
models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "SVM": SVC(random_state=42),
    "KNN": KNeighborsClassifier()
}

In [18]:
for model_name, model in models.items():
    print(f"Training {model_name}...")
    model.fit(X_train_scaled, y_train)  
    y_pred = model.predict(X_test_scaled)  

    # Evaluate the model
    print(f"Results for {model_name}:")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("-" * 50)

    # Save the trained model
    model_filename = f"D:\IIT\Subjects\(4605)IRP\Devlo\models\{model_name.replace(' ', '_').lower()}_model.joblib"
    joblib.dump(model, model_filename)
    print(f"Saved {model_name} model to {model_filename}.")

Training Random Forest...
Results for Random Forest:
Accuracy: 0.42
Classification Report:
              precision    recall  f1-score   support

           0       0.43      0.50      0.46       500
           1       0.43      0.45      0.44       471
           2       0.41      0.38      0.39       473
           3       0.40      0.33      0.36       470

    accuracy                           0.42      1914
   macro avg       0.42      0.42      0.41      1914
weighted avg       0.42      0.42      0.42      1914

Confusion Matrix:
[[251  69  74 106]
 [ 78 210 118  65]
 [108 119 182  64]
 [145  93  75 157]]
--------------------------------------------------
Saved Random Forest model to D:\IIT\Subjects\(4605)IRP\Devlo\models\random_forest_model.joblib.
Training SVM...
Results for SVM:
Accuracy: 0.42
Classification Report:
              precision    recall  f1-score   support

           0       0.40      0.57      0.47       500
           1       0.46      0.45      0.46       47