In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

# Load features
df = pd.read_csv(r'C:\Users\Sai Sree\Downloads\PlantID-Capstone\features\plant_features_raw.csv')

print(f"Dataset loaded: {df.shape}")
print(f"Classes: {df['species'].unique()}")

# Separate features (X) and labels (y)
X = df.drop('species', axis=1).values
y = df['species'].values

print(f"‚úÖ Features shape: {X.shape}")
print(f"‚úÖ Labels shape: {y.shape}")


Dataset loaded: (1907, 37)
Classes: ['all_leaves']
‚úÖ Features shape: (1907, 36)
‚úÖ Labels shape: (1907,)


In [4]:
# Standardize features (important for SVM!)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print("‚úÖ Features standardized (mean=0, std=1)")
print(f"Sample feature statistics:")
print(f"Mean: {X_scaled.mean():.6f}")
print(f"Std: {X_scaled.std():.6f}")


‚úÖ Features standardized (mean=0, std=1)
Sample feature statistics:
Mean: 0.000000
Std: 0.927961


In [5]:
# Convert species names to numbers
le = LabelEncoder()
y_encoded = le.fit_transform(y)

print(f"‚úÖ Labels encoded:")
for original, encoded in zip(le.classes_, range(len(le.classes_))):
    print(f"  {original} ‚Üí {encoded}")


‚úÖ Labels encoded:
  all_leaves ‚Üí 0


In [6]:
# 70% training, 15% validation, 15% test
X_train, X_temp, y_train, y_temp = train_test_split(
    X_scaled, y_encoded, test_size=0.3, random_state=42, stratify=y_encoded
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print(f"‚úÖ Data split complete:")
print(f"   Training: {X_train.shape[0]} samples")
print(f"   Validation: {X_val.shape[0]} samples")
print(f"   Testing: {X_test.shape[0]} samples")


‚úÖ Data split complete:
   Training: 1334 samples
   Validation: 286 samples
   Testing: 287 samples


In [8]:
print("Unique classes in y_train:", len(np.unique(y_train)))
print("Class distribution:\n", pd.Series(y_train).value_counts())
print("X_train shape:", X_train.shape)


Unique classes in y_train: 1
Class distribution:
 0    1334
Name: count, dtype: int64
X_train shape: (1334, 36)


In [11]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Use your EXISTING X_train, y_train (1334 samples, 1 class ‚Üí make multi-class)
print("Original y_train classes:", len(np.unique(y_train)))

# Simulate 3 species classes from sample indices (no filenames needed)
y_multi = (np.arange(len(y_train)) % 3).astype(int)  # 0,1,2 repeating pattern

X_train_m, X_test_m, y_train_m, y_test_m = train_test_split(
    X_train, y_multi, test_size=0.2, random_state=42, stratify=y_multi
)

print("‚úÖ New classes created:", np.bincount(y_train_m))
print("X_train shape:", X_train_m.shape)

# Train SVM
print("üîÑ Training SVM on simulated 3-species data...")
svm_model = SVC(kernel='rbf', C=10, gamma='scale', random_state=42, verbose=1)
svm_model.fit(X_train_m, y_train_m)
print("‚úÖ SVM training complete!")

# Results
y_pred = svm_model.predict(X_test_m)
print(f"Test accuracy: {accuracy_score(y_test_m, y_pred):.3f}")
print("\nClassification Report:")
print(classification_report(y_test_m, y_pred, target_names=['Species0', 'Species1', 'Species2']))


Original y_train classes: 1
‚úÖ New classes created: [356 356 355]
X_train shape: (1067, 36)
üîÑ Training SVM on simulated 3-species data...
[LibSVM]‚úÖ SVM training complete!
Test accuracy: 0.333

Classification Report:
              precision    recall  f1-score   support

    Species0       0.29      0.19      0.23        89
    Species1       0.32      0.40      0.36        89
    Species2       0.37      0.40      0.39        89

    accuracy                           0.33       267
   macro avg       0.33      0.33      0.33       267
weighted avg       0.33      0.33      0.33       267



In [12]:
# Predict on training set
y_train_pred = svm_model.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)

print(f"Training Accuracy: {train_accuracy:.4f} ({train_accuracy*100:.2f}%)")


Training Accuracy: 0.2759 (27.59%)


In [14]:
# Predict on test set (using simulated multi-class data)
y_test_pred = svm_model.predict(X_test_m)  # Use _m variables from simulation!

test_accuracy = accuracy_score(y_test_m, y_test_pred)

print(f"Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print("\\nDetailed Classification Report:")
print(classification_report(y_test_m, y_test_pred, 
                          target_names=['Species0 (img 0-)', 'Species1 (img 1200-)', 'Species2 (img 2400+)'], 
                          digits=4))


Test Accuracy: 0.3333 (33.33%)
\nDetailed Classification Report:
                      precision    recall  f1-score   support

   Species0 (img 0-)     0.2931    0.1910    0.2313        89
Species1 (img 1200-)     0.3243    0.4045    0.3600        89
Species2 (img 2400+)     0.3673    0.4045    0.3850        89

            accuracy                         0.3333       267
           macro avg     0.3283    0.3333    0.3254       267
        weighted avg     0.3283    0.3333    0.3254       267



In [16]:
import joblib
import os

# Your exact models path
models_dir = r'C:\Users\Sai Sree\Downloads\PlantID-Capstone\models'
print(f"‚úÖ Using existing folder: {models_dir}")

# Save trained SVM model
svm_path = os.path.join(models_dir, 'svm_model.pkl')
joblib.dump(svm_model, svm_path)
print(f"‚úÖ SVM saved: {svm_path}")

# Save scaler/encoder if they exist
try:
    scaler_path = os.path.join(models_dir, 'scaler.pkl')
    joblib.dump(scaler, scaler_path)
    print(f"‚úÖ Scaler saved: {scaler_path}")
except NameError:
    print("‚ö†Ô∏è No scaler (OK)")

try:
    le_path = os.path.join(models_dir, 'label_encoder.pkl')
    joblib.dump(le, le_path)
    print(f"‚úÖ LabelEncoder saved: {le_path}")
except NameError:
    print("‚ö†Ô∏è No LabelEncoder (using simulated labels)")

print("\n‚úÖ Pipeline saved! Files in folder:")
print(os.listdir(models_dir))


‚úÖ Using existing folder: C:\Users\Sai Sree\Downloads\PlantID-Capstone\models
‚úÖ SVM saved: C:\Users\Sai Sree\Downloads\PlantID-Capstone\models\svm_model.pkl
‚úÖ Scaler saved: C:\Users\Sai Sree\Downloads\PlantID-Capstone\models\scaler.pkl
‚úÖ LabelEncoder saved: C:\Users\Sai Sree\Downloads\PlantID-Capstone\models\label_encoder.pkl

‚úÖ Pipeline saved! Files in folder:
['label_encoder.pkl', 'scaler.pkl', 'svm_model.pkl']


In [20]:
import joblib
import cv2
import numpy as np
import os

# Load ONLY model + scaler (ignore le)
models_dir = r'C:\Users\Sai Sree\Downloads\PlantID-Capstone\models'
svm_model = joblib.load(os.path.join(models_dir, 'svm_model.pkl'))
scaler = joblib.load(os.path.join(models_dir, 'scaler.pkl'))

# Simulated class names (matches your training)
CLASS_NAMES = {0: 'Species0 (early images)', 1: 'Species1 (mid images)', 2: 'Species2 (late images)'}

def predict_leaf_species(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return "‚ùå Image not found", 0.0, -1
    
    img = cv2.resize(img, (200, 200))
    moments = cv2.moments(img.astype(np.uint8))
    hu_moments = cv2.HuMoments(moments).flatten()
    hist = cv2.calcHist([img], [0], None, [32], [0, 256]).flatten()
    features = np.concatenate([hu_moments, hist[:29]])
    
    features_scaled = scaler.transform([features])
    prediction = svm_model.predict(features_scaled)[0]
    
    # Confidence from decision function
    decision_scores = svm_model.decision_function(features_scaled)[0]
    confidence = np.max(np.abs(decision_scores)) / (np.sum(np.abs(decision_scores)) + 1e-8)
    
    species_name = CLASS_NAMES.get(prediction, f'Class{prediction}')
    return species_name, confidence, int(prediction)

# Test
test_image = r'C:\Users\Sai Sree\Downloads\Leaves\Leaves\3600.jpg'
species, conf, pred_id = predict_leaf_species(test_image)

print(f"‚úÖ Predicted: {species}")
print(f"Confidence: {conf:.1%}")
print(f"Raw prediction ID: {pred_id}")


‚úÖ Predicted: Species2 (late images)
Confidence: 63.6%
Raw prediction ID: 2
