In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix,confusion_matrix, precision_score, recall_score
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [87]:
X_train, X_test, y_train, y_test  = 0,0,0,0

def preprocess(df, axis=0):
    # Drop null values
    df.dropna(inplace=True)
    # Drop duplicate rows
    df.drop_duplicates(inplace=True)
    # Reset index after dropping rows
    df.reset_index(drop=True, inplace=True)
    
    return df


def get_train_and_test(df):
    # Prepare the data
    X = df[['area', 'width', 'circularity', 'ellipticity', 'aspect ratio', 'form factor', 'perimeter',
            'perimeter ratio of diameter', 'perimeter Ratio of Physiological Length and Physiological Width']]
    y = df['Species']

    # Perform Standard Scaling
    standard_scaler = StandardScaler()
    X_standard_scaled = standard_scaler.fit_transform(X)

    # Split the data into training and testing sets
    global X_train, X_test, y_train, y_test
    X_train, X_test, y_train, y_test = train_test_split(X_standard_scaled, y, test_size=0.1, random_state=42)

    return standard_scaler.fit(X)


def set_optimal_perimeters():
    # Define the hyperparameter grid for tuning
    # Define the range and step size
    start = 1
    stop = 100
    step = 50

    # Create the array using numpy's arange function
    arr = np.arange(start, stop + step, step)
    gamma = list(np.logspace(-3, 3, 20))
    param_grid = {
      'C':arr,
      'kernel': ['rbf'],
      'degree': [2],
      'gamma': ['scale', 'auto'] + gamma
      #['scale', 'auto'] +gamma
    }

    # param_grid = {
    #     'C': [101],
    #     'kernel': ['rbf'],
    #     'degree': [2],
    #     'gamma': [2.976351441631316]
    #     # ['scale', 'auto'] +gamma
    # }

    return param_grid


# Create a dataframe with the dependent and independent variables
def print_metrics(y_test, y_pred):
    # Evaluate the model on the test set
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')

    print('Accuracy:', accuracy)
    print('Precision:', precision)
    print('Recall:', recall)


In [91]:
def svm_train(param_grid):
    global X_train, y_train
    # Define the SVM classifier
    svm = SVC()

    # Perform Grid Search Cross Validation
    grid_search = GridSearchCV(svm, param_grid, cv=5)
    grid_search.fit(X_train, y_train)

    # Print the best hyperparameters and corresponding accuracy
    print("Best Hyperparameters: ", grid_search.best_params_)
    print("Best Accuracy: ", grid_search.best_score_)

    # Train SVM with the best hyperparameters on the entire training set
    best_svm = grid_search.best_estimator_

    best_svm.fit(X_train, y_train)
    # Evaluate the model on the test set
    accuracy = best_svm.score(X_test, y_test)
    # Predict labels for test data
    y_pred = best_svm.predict(X_test)

    # Calculate confusion matrix
    cm = confusion_matrix(y_test, y_pred)

    print_metrics(y_test, y_pred)

    return best_svm

In [88]:
file_path = '../../csv/features_data-Sheet1.csv'
df_ = pd.read_csv(file_path)
df_

Unnamed: 0,length,width,diameter,area,perimeter,rectangularity,circularity,ellipticity,aspect ratio,form factor,narrow factor,perimeter ratio of diameter,perimeter Ratio of Physiological Length and Physiological Width,Texture,Species
0,380.0,305.0,413.762665,70748.5,1269.207273,1.245902,0.551902,1.908372,1.245902,0.551902,1.088849,3.067477,1.852857,7.0,0.0
1,377.0,208.0,373.708954,56552.0,1043.234627,1.812500,0.652971,1.858469,1.812500,0.652971,0.991270,2.791570,1.783307,1.0,0.0
2,386.0,146.0,393.509216,33447.0,975.352375,2.643836,0.441819,3.276360,2.643836,0.441819,1.019454,2.478601,1.833369,8.0,0.0
3,387.0,317.0,408.597290,70679.0,1191.768679,1.220820,0.625340,1.613881,1.220820,0.625340,1.055807,2.916732,1.692853,9.0,0.0
4,387.0,182.0,384.071136,54103.0,1032.038666,0.470284,0.638321,2.142146,2.126374,0.638321,0.992432,2.687103,1.813776,9.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4173,217.0,108.0,226.773682,8754.0,536.014280,2.009259,0.382881,4.527912,2.009259,0.382881,1.045040,2.363653,1.649275,9.0,3.0
4174,385.0,147.0,387.669403,32310.5,952.222429,0.381818,0.447792,3.336835,2.619048,0.447792,1.006934,2.456274,1.789892,1.0,3.0
4175,379.0,107.0,352.927338,23339.0,903.754395,3.542056,0.359080,4.398380,3.542056,0.359080,0.931207,2.560738,1.859577,1.0,3.0
4176,387.0,137.0,392.361908,36179.0,975.470123,0.354005,0.477792,3.154620,2.824818,0.477792,1.013855,2.486149,1.861584,1.0,3.0


In [89]:
df = preprocess(df_)
df

Unnamed: 0,length,width,diameter,area,perimeter,rectangularity,circularity,ellipticity,aspect ratio,form factor,narrow factor,perimeter ratio of diameter,perimeter Ratio of Physiological Length and Physiological Width,Texture,Species
0,380.0,305.0,413.762665,70748.5,1269.207273,1.245902,0.551902,1.908372,1.245902,0.551902,1.088849,3.067477,1.852857,7.0,0.0
1,377.0,208.0,373.708954,56552.0,1043.234627,1.812500,0.652971,1.858469,1.812500,0.652971,0.991270,2.791570,1.783307,1.0,0.0
2,386.0,146.0,393.509216,33447.0,975.352375,2.643836,0.441819,3.276360,2.643836,0.441819,1.019454,2.478601,1.833369,8.0,0.0
3,387.0,317.0,408.597290,70679.0,1191.768679,1.220820,0.625340,1.613881,1.220820,0.625340,1.055807,2.916732,1.692853,9.0,0.0
4,387.0,182.0,384.071136,54103.0,1032.038666,0.470284,0.638321,2.142146,2.126374,0.638321,0.992432,2.687103,1.813776,9.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3063,217.0,108.0,226.773682,8754.0,536.014280,2.009259,0.382881,4.527912,2.009259,0.382881,1.045040,2.363653,1.649275,9.0,3.0
3064,385.0,147.0,387.669403,32310.5,952.222429,0.381818,0.447792,3.336835,2.619048,0.447792,1.006934,2.456274,1.789892,1.0,3.0
3065,379.0,107.0,352.927338,23339.0,903.754395,3.542056,0.359080,4.398380,3.542056,0.359080,0.931207,2.560738,1.859577,1.0,3.0
3066,387.0,137.0,392.361908,36179.0,975.470123,0.354005,0.477792,3.154620,2.824818,0.477792,1.013855,2.486149,1.861584,1.0,3.0


# SVM

In [90]:
X_standard_scaled = get_train_and_test(df_)
param_grid = set_optimal_perimeters()

best_svm = svm_train(param_grid)

Best Hyperparameters:  {'C': 101, 'degree': 2, 'gamma': 1.438449888287663, 'kernel': 'rbf'}
Best Accuracy:  0.9355301779490002
Accuracy: 0.9576547231270358
Precision: 0.9584822036256159
Recall: 0.9576547231270358


In [22]:
import joblib

# Save the best SVM model
joblib.dump(best_svm, '../../models/plant_prediction_model_svm.pkl')
# Save standard scaler
joblib.dump(X_standard_scaled, '../../models/X_standard_scaler.pkl')

['../../models/X_standard_scaler.pkl']

In [8]:
df = pd.read_csv(file_path) 
preprocess(df)
X_standard_scaled = get_train_and_test(df)

In [24]:
import joblib
from sklearn import svm

# Load the saved SVM model
loaded_model = joblib.load('../../models/plant_prediction_model_svm.pkl')

# Make predictions on the test data
y_pred = loaded_model.predict(X_test)

# Evaluate the model on the test set
print_metrics(y_test, y_pred)

Accuracy: 0.9808612440191388
Precision: 0.9812104091765108
Recall: 0.9808612440191388


In [26]:
X_standard_scaler = joblib.load('../../models/X_standard_scaler.pkl')
new_scaled_data = X_standard_scaler.transform(np.array([[70748.5,305.0,0.5519020363926462,1.908372278751408,1.2459016393442623,0.5519020363926462,1269.207272648811,3.067476552718655,1.8528573323340312]]))

# Make predictions
predictions = best_svm.predict(new_scaled_data)

# Print the predicted class labels
print("Predicted Class Labels: ", predictions)

Predicted Class Labels:  [0.]


