In [86]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import ast

# Load the dataset
data = pd.read_csv('leafsnap_data.csv')

hv = [d.replace("'", "") for d in data['hist_values']]
hv = [ast.literal_eval(d) for d in data['hist_values']]

X = hv
y = data['plant']

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [109]:
# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [110]:
# Create the k-nearest neighbors model
k = 6 # Set the value of k
knn = KNeighborsClassifier(n_neighbors=k)

# Train the model on the training set
knn.fit(X_train, y_train)

# Make predictions on the test set
y_pred = knn.predict(X_test)

# Evaluate the model performance using accuracy score
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

Accuracy: 0.25


In [111]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix,confusion_matrix, precision_score, recall_score
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler


def set_optimal_perimeters():
    # Define the hyperparameter grid for tuning
    # Define the range and step size
    start = 1
    stop = 100
    step = 50

    # Create the array using numpy's arange function
    arr = np.arange(start, stop + step, step)
    gamma = list(np.logspace(-3, 3, 30))
    param_grid = {
      'C':arr,
      'kernel': ['rbf'],
      'degree': [2],
      'gamma': ['scale', 'auto'] + gamma
      #['scale', 'auto'] +gamma
    }

    # param_grid = {
    #     'C': [101],
    #     'kernel': ['rbf'],
    #     'degree': [2],
    #     'gamma': [2.976351441631316]
    #     # ['scale', 'auto'] +gamma
    # }
    return param_grid


def svm_train(param_grid, X_train, X_test, y_train, y_test):
    # Define the SVM classifier
    svm = SVC()
    # Perform Grid Search Cross Validation
    grid_search = GridSearchCV(svm, param_grid, cv=5)
    grid_search.fit(X_train, y_train)
    # Print the best hyperparameters and corresponding accuracy
    print("Best Hyperparameters: ", grid_search.best_params_)
    print("Best Accuracy: ", grid_search.best_score_)
    # Train SVM with the best hyperparameters on the entire training set
    best_svm = grid_search.best_estimator_
    best_svm.fit(X_train, y_train)
    # Evaluate the model on the test set
    accuracy = best_svm.score(X_test, y_test)
    # Predict labels for test data
    y_pred = best_svm.predict(X_test)
    return best_svm


param_grid = set_optimal_perimeters()
svm = svm_train(param_grid, X_train_scaled, X_test, y_train, y_test)



In [61]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix,confusion_matrix, precision_score, recall_score
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler


def set_optimal_perimeters():
    # Define the hyperparameter grid for tuning
    # Define the range and step size
    start = 1
    stop = 100
    step = 50

    # Create the array using numpy's arange function
    arr = np.arange(start, stop + step, step)
    gamma = list(np.logspace(-3, 3, 30))
    param_grid = {
      'C':arr,
      'kernel': ['rbf'],
      'degree': [2],
      'gamma': ['scale', 'auto'] + gamma
      #['scale', 'auto'] +gamma
    }

    # param_grid = {
    #     'C': [101],
    #     'kernel': ['rbf'],
    #     'degree': [2],
    #     'gamma': [2.976351441631316]
    #     # ['scale', 'auto'] +gamma
    # }
    return param_grid


# Create a dataframe with the dependent and independent variables
def print_metrics(y_test, y_pred):
    # Evaluate the model on the test set
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')

    print('Accuracy:', accuracy)
    print('Precision:', precision)
    print('Recall:', recall)
    

def rf_train(X_train, X_test, y_train, y_test):
    # Define the NB classifier
    rf = RandomForestClassifier()
    # Define hyperparameter grid to search over
    param_grid = {
        'n_estimators': [10, 50, 100],  # Number of trees in the forest
        'max_depth': [None, 10, 20],  # Maximum depth of the trees
        'min_samples_split': [2, 5, 10],  # Minimum number of samples required to split a node
        'min_samples_leaf': [1, 2, 4],  # Minimum number of samples required to be at a leaf node
        'max_features': ['sqrt', 'log2']  # Number of features to consider for the best split
    }
    # Create GridSearchCV object with Random Forest Classifier and hyperparameter grid
    grid_search = GridSearchCV(rf, param_grid, cv=5)
    grid_search.fit(X_train, y_train)
    # Get the best hyperparameters found by GridSearchCV
    best_params = grid_search.best_params_
    print("Best Parameters: ", best_params)
    print("Best Accuracy: ", grid_search.best_score_)
    # Train Random Forest Classifier with the best hyperparameters on the entire training data
    best_rf = RandomForestClassifier(**best_params)
    best_rf.fit(X_train, y_train)
    # Evaluate the model on the test set
    accuracy = best_rf.score(X_test, y_test)
    # Predict labels for test data
    y_pred = best_rf.predict(X_test)
    print_metrics(y_test, y_pred)
    return best_rf


rf = rf_train(X_train, X_test, y_train, y_test)



Best Parameters:  {'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 4, 'min_samples_split': 5, 'n_estimators': 100}
Best Accuracy:  0.7515151515151515
Accuracy: 0.7037037037037037
Precision: 0.7530864197530863
Recall: 0.7037037037037037


  _warn_prf(average, modifier, msg_start, len(result))
