In [42]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix,confusion_matrix, precision_score, recall_score
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [44]:
X_train, X_test, y_train, y_test  = 0,0,0,0
df = None

In [67]:
def preprocess(df):
  df = df.dropna(inplace=True)

def get_train_and_test(df):
  # Prepare the data
  X = df[['area','width','circularity', 'ellipticity', 'form factor','perimeter' ,'perimeter ratio of diameter', 'perimeter Ratio of Physiological Length and Physiological Width']]
  y = df['Species']
  # Perform Min-Max Scaling
  min_max_scaler = MinMaxScaler()
  X_minmax_scaled = min_max_scaler.fit_transform(X)

  # Perform Standard Scaling
  standard_scaler = StandardScaler()
  X_standard_scaled = standard_scaler.fit_transform(X)
  # Split the data into training and testing sets
  global X_train, X_test, y_train, y_test
  X_train, X_test, y_train, y_test = train_test_split(X_standard_scaled, y, test_size=0.1, random_state=42)

def set_optimal_perimeters():
    # Define the hyperparameter grid for tuning
  # Define the range and step size
  start = 1
  stop = 50
  step = 0.5

  # Create the array using numpy's arange function
  arr = np.arange(start, stop + step, step)
  gamma =  list(np.logspace(-3, 3, 20))
  param_grid = {
    'C':[48.2],
    'kernel': ['rbf'],
    'degree': [2],
    'gamma': [6.158482110660261]
    #['scale', 'auto'] +gamma
  }
  return param_grid

def train(param_grid):
  global X_train, y_train
    # Define the SVM classifier
  svm = SVC()

  # Perform Grid Search Cross Validation
  grid_search = GridSearchCV(svm, param_grid, cv=5)
  grid_search.fit(X_train, y_train)

  # Print the best hyperparameters and corresponding accuracy
  print("Best Hyperparameters: ", grid_search.best_params_)
  print("Best Accuracy: ", grid_search.best_score_)

  # Train SVM with the best hyperparameters on the entire training set
  best_svm = grid_search.best_estimator_
  
  best_svm.fit(X_train, y_train)
  # Evaluate the model on the test set
  accuracy = best_svm.score(X_test, y_test)
  # Predict labels for test data
  y_pred = best_svm.predict(X_test)

  # Calculate confusion matrix
  cm = confusion_matrix(y_test, y_pred)

  # Extract true positives, false positives, true negatives, and false negatives from confusion matrix


  # Calculate recall
  recall = recall_score(y_test, y_pred, average='weighted')

  # Calculate precision
  precision = precision_score(y_test, y_pred, average='weighted')



  # Print the calculated metrics
  print("Recall: {:.4f}".format(recall))
  print("Precision: {:.4f}".format(precision))
  print("Test Accuracy: ", accuracy)

  

In [68]:
file_path = 'sheet.csv'
df = pd.read_csv(file_path) 
preprocess(df)
get_train_and_test(df)
param_grid = set_optimal_perimeters()
best_svm = train(param_grid)



Best Hyperparameters:  {'C': 48.2, 'degree': 2, 'gamma': 6.158482110660261, 'kernel': 'rbf'}
Best Accuracy:  0.9154970224461751
Recall: 0.9758
Precision: 0.9780
Test Accuracy:  0.9757575757575757


In [None]:
# Input new data for prediction
new_data = np.array([[ ]])  # Replace this with your new data

# Make predictions
predictions = best_svm.predict(new_data)

# Print the predicted class labels
print("Predicted Class Labels: ", predictions)