In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from scipy.stats import uniform, randint

# Function to input the dataset from an Excel file
def input_dataset(path):
    dataset = pd.read_excel(path)
    inputs = dataset.iloc[:, 2:].values  # Features: all columns except the last attribute
    targets = dataset.iloc[:, 1].values  # Target: last column
    return inputs, targets

# Function to split the dataset into training and testing sets
def divide_dataset(inputs, targets, test_size=0.2, random_state=40):
    X_train, X_test, y_train, y_test = train_test_split(inputs, targets, test_size=test_size, random_state=random_state)
    return X_train, X_test, y_train, y_test

# Function to define the hyperparameter space for tuning the SVM model
def set_hyperparameters():
    parameter_space = {
        'C': uniform(0.1, 10),  # Regularization parameter, the higher the value, the less regularization
        'kernel': ['linear', 'rbf'],  # Simplified kernel types
        'gamma': ['scale', 'auto'],  # Kernel coefficient for 'rbf'
    }
    return parameter_space

# Function to train the SVM model using RandomizedSearchCV for hyperparameter tuning
def train_classifier_model(X_train, y_train, param_dist, n_iterations=5, cv=3):
    """Performs hyperparameter tuning using RandomizedSearchCV and trains the SVM model."""
    svm = SVC()  # Initialize the SVM model
    random_search = RandomizedSearchCV(svm, param_distributions=param_dist, n_iter=n_iterations, cv=cv, random_state=42, n_jobs=2)  # Reduced iterations and cv

    try:
        random_search.fit(X_train, y_train)
        print(f"Best parameters found: {random_search.best_params_}")
        print(f"Best cross-validation score: {random_search.best_score_}")
        return random_search.best_estimator_  # Return the best model
    except ValueError as e:
        print(f"Error during model training: {e}")
        return None

# Function to test the model on the test set and print the results
def test_model(model, X_test, y_test):
    if model is None:
        print("Model training was unsuccessful. No testing performed.")
        return

    y_predicted = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_predicted)
    print(f"Test accuracy: {accuracy}")

    # Display the first 15 predictions with their corresponding inputs
    for i, input_data in enumerate(X_test[:15]):
        prediction = model.predict([input_data])
        print(f"Input: {input_data} - Predicted Output: {prediction[0]} - Expected Output: {y_test[i]}")

# Main script logic
path = "/content/bird_species_features (1).xlsx"  # Update with your dataset path
inputs, targets = input_dataset(path)  # Load the dataset
X_train, X_test, y_train, y_test = divide_dataset(inputs, targets)  # Split the dataset

# Reduce dataset size for faster processing
X_train_sample, _, y_train_sample, _ = train_test_split(X_train, y_train, test_size=0.8, random_state=42)

param_dist = set_hyperparameters()  # Get the hyperparameter space
best_model = train_classifier_model(X_train_sample, y_train_sample, param_dist)  # Train and get the best model
test_model(best_model, X_test, y_test)  # Test the best model


Best parameters found: {'C': 3.845401188473625, 'gamma': 'scale', 'kernel': 'linear'}
Best cross-validation score: 0.24641167269054934
Test accuracy: 0.24635669673837612
Input: [ 0.11023741 -0.10089833  0.13097738  0.76555126  0.11272754 -0.29706726
 -1.04207544  0.25039758  0.93715677  0.06312116  0.22645647  0.32325675
  0.25399843  0.05977403  0.0288605  -1.25141933] - Predicted Output: Black Francolin - Expected Output: Painted Francolin
Input: [-0.22337695 -0.28277013 -0.41301861 -0.22569126  1.36599427  0.68984069
 -0.95045707  0.4466341   0.57649733 -0.64171796 -0.9993563  -0.98722529
 -1.61311656 -1.12000393 -0.57436886 -1.32612282] - Predicted Output: Black Francolin - Expected Output: Black Francolin
Input: [-0.06657304  1.20153394 -0.54960494  0.56718821 -0.68725944 -1.0953616
  0.66267038  0.76900913  0.1685736  -1.04028922  1.34611344  1.85958356
 -0.45212484 -0.54183473  1.61239475  1.8983187 ] - Predicted Output: Black Francolin - Expected Output: Grey Francolin
Input: [