In [1]:
import os
import pandas as pd
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import joblib  # Import joblib directly


# Define paths
dataset_path = "D:/Project/dataset.xlsx"  # Path to the dataset Excel file
image_dir = "D:/Project/images"  # Directory containing the images

# Step 1: Load the dataset
dataset = pd.read_excel(dataset_path)

# Step 2: Read images and extract features
features = []
labels = []

for folder_name in os.listdir(image_dir):
    folder_path = os.path.join(image_dir, folder_name)
    if os.path.isdir(folder_path):
        for filename in os.listdir(folder_path):
            image_path = os.path.join(folder_path, filename)
            try:
                image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Convert to grayscale
                if image is None:
                    print("Error: Unable to read image at path:", image_path)
                    continue
                resized_image = cv2.resize(image, (100, 100))  # Resize image to 100x100
                features.append(resized_image.flatten())
                labels.append(folder_name)
            except Exception as e:
                print("Error:", e)
                continue

# Step 3: Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Step 4: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)

# Step 5: Define a pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Scale features
    ('pca', PCA(n_components=100)),  # Apply PCA for dimensionality reduction
    ('svm', SVC(kernel='rbf', C=10, gamma=0.001))  # SVM with RBF kernel
])

# Step 6: Grid search for hyperparameter tuning
param_grid = {
    'svm__C': [1, 10, 100],
    'svm__gamma': [0.001, 0.01, 0.1, 1]
}

grid_search = GridSearchCV(pipeline, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Step 7: Evaluate the model
train_predictions = grid_search.predict(X_train)
test_predictions = grid_search.predict(X_test)

train_accuracy = accuracy_score(y_train, train_predictions)
test_accuracy = accuracy_score(y_test, test_predictions)

print("Training Accuracy:", train_accuracy)
print("Testing Accuracy:", test_accuracy)

# Step 8: Save the trained model
model_path = 'models/best_model.pkl'
joblib.dump(grid_search.best_estimator_, model_path)



KeyboardInterrupt: 

In [None]:
# Step 8: Save the trained model
model_path = 'models/best_model.pkl'
joblib.dump(grid_search.best_estimator_, model_path)

['models/best_model.pkl']

In [None]:
import os
import cv2
import joblib

# Load the trained model
model_path = 'D:/Project/models/best_model.pkl'
model = joblib.load(model_path)

# Sample image path
sample_image_path = 'E:/Project/python/images/Alstonia scholaris/Image_2.jpg'  # Change to your sample image path

# Read and preprocess the sample image
sample_image = cv2.imread(sample_image_path, cv2.IMREAD_GRAYSCALE)
sample_image = cv2.resize(sample_image, (100, 100))  # Resize image to match training size
sample_image = sample_image.flatten().reshape(1, -1)  # Flatten and reshape for prediction

# Predict the class of the sample image
predicted_class = model.predict(sample_image)[0]

print("Predicted class:", predicted_class)


Predicted class: 14


In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import joblib

# Load your dataset
dataset_path = "D:/Project/dataset.xlsx"  # Path to your dataset Excel file
dataset = pd.read_excel(dataset_path)

# Extract the target labels from your dataset
target_labels = dataset['Scientific_name'].tolist()

# Remove duplicates and preserve the order
unique_labels = list(dict.fromkeys(target_labels))

# Create and fit the label encoder
label_encoder = LabelEncoder()
label_encoder.fit(unique_labels)

# Save the label encoder to a file
label_encoder_path = "D:/Project/models/label_encoder.pkl"
joblib.dump(label_encoder, label_encoder_path)

print("Label Encoder saved successfully to:", label_encoder_path)


Label Encoder saved successfully to: D:/Project/models/label_encoder.pkl
