In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix,confusion_matrix, precision_score, recall_score
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler



In [2]:
X_train, X_test, y_train, y_test  = 0,0,0,0
df = None

In [20]:
def preprocess(df):
    df = df.dropna(inplace=True)


def get_train_and_test(df):
    # Prepare the data
    X = df[['area', 'width', 'circularity', 'ellipticity', 'aspect ratio', 'form factor', 'perimeter',
            'perimeter ratio of diameter', 'perimeter Ratio of Physiological Length and Physiological Width']]
    y = df['Species']

    # Perform Standard Scaling
    standard_scaler = StandardScaler()
    X_standard_scaled = standard_scaler.fit_transform(X)
    # Split the data into training and testing sets
    global X_train, X_test, y_train, y_test
    X_train, X_test, y_train, y_test = train_test_split(
        X_standard_scaled, y, test_size=0.1, random_state=42)

    return standard_scaler.fit(X)


def set_optimal_perimeters():
    # Define the hyperparameter grid for tuning
    # Define the range and step size
    start = 1
    stop = 100
    step = 50

    # Create the array using numpy's arange function
    arr = np.arange(start, stop + step, step)
    gamma = list(np.logspace(-3, 3, 20))
    # param_grid = {
    #   'C':arr,
    #   'kernel': ['rbf'],
    #   'degree': [2],
    #   'gamma': ['scale', 'auto'] +gamma
    #   #['scale', 'auto'] +gamma
    # }

    param_grid = {
        'C': [101],
        'kernel': ['rbf'],
        'degree': [2],
        'gamma': [2.976351441631316]
        # ['scale', 'auto'] +gamma
    }

    return param_grid


# Create a dataframe with the dependent and independent variables
def print_metrics(y_test, y_pred):
    # Evaluate the model on the test set
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')

    print('Accuracy:', accuracy)
    print('Precision:', precision)
    print('Recall:', recall)


def train(param_grid):
    global X_train, y_train
    # Define the SVM classifier
    svm = SVC()

    # Perform Grid Search Cross Validation
    grid_search = GridSearchCV(svm, param_grid, cv=5)
    grid_search.fit(X_train, y_train)

    # Print the best hyperparameters and corresponding accuracy
    print("Best Hyperparameters: ", grid_search.best_params_)
    print("Best Accuracy: ", grid_search.best_score_)

    # Train SVM with the best hyperparameters on the entire training set
    best_svm = grid_search.best_estimator_

    best_svm.fit(X_train, y_train)
    # Evaluate the model on the test set
    accuracy = best_svm.score(X_test, y_test)
    # Predict labels for test data
    y_pred = best_svm.predict(X_test)

    # Calculate confusion matrix
    cm = confusion_matrix(y_test, y_pred)

    print_metrics(y_test, y_pred)

    return best_svm


In [10]:
file_path = '../../csv/features_data-Sheet1.csv'

In [21]:
df = pd.read_csv(file_path) 
preprocess(df)

X_standard_scaled = get_train_and_test(df)
param_grid = set_optimal_perimeters()

best_svm = train(param_grid)


Best Hyperparameters:  {'C': 101, 'degree': 2, 'gamma': 2.976351441631316, 'kernel': 'rbf'}
Best Accuracy:  0.9619273592656599
Accuracy: 0.9808612440191388
Precision: 0.9812104091765108
Recall: 0.9808612440191388


In [22]:
import joblib

# Save the best SVM model
joblib.dump(best_svm, '../../models/plant_prediction_model_svm.pkl')

# Save standard scaler
joblib.dump(X_standard_scaled, '../../models/X_standard_scaler.pkl')

['../../models/X_standard_scaler.pkl']

In [23]:
df = pd.read_csv(file_path) 
preprocess(df)
X_standard_scaled = get_train_and_test(df)

In [24]:
import joblib
from sklearn import svm

# Load the saved SVM model
loaded_model = joblib.load('../../models/plant_prediction_model_svm.pkl')

# Make predictions on the test data
y_pred = loaded_model.predict(X_test)

# Evaluate the model on the test set
print_metrics(y_test, y_pred)

Accuracy: 0.9808612440191388
Precision: 0.9812104091765108
Recall: 0.9808612440191388


In [26]:
X_standard_scaler = joblib.load('../../models/X_standard_scaler.pkl')
new_scaled_data = X_standard_scaler.transform(np.array([[70748.5,305.0,0.5519020363926462,1.908372278751408,1.2459016393442623,0.5519020363926462,1269.207272648811,3.067476552718655,1.8528573323340312]]))

# Make predictions
predictions = best_svm.predict(new_scaled_data)

# Print the predicted class labels
print("Predicted Class Labels: ", predictions)

Predicted Class Labels:  [0.]


