In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score

In [2]:
data = np.load('Apple_15.npy', allow_pickle= True)
data.shape

(420, 301)

In [3]:
X= data[:, :-1]
y= data[:, -1]  

X.shape, y.shape

((420, 300), (420,))

In [4]:
scaler = StandardScaler()
scaler.fit(X)

Xn = scaler.transform(X)

In [6]:
# Split data into training, validation, and testing sets
X_train, X_test, y_train, y_test = train_test_split(Xn, y, test_size=0.25, random_state=42, stratify=y)

X_train.shape,y_train.shape,X_test.shape

((315, 300), (315,), (105, 300))

In [7]:
param_grid = {
    
    'n_neighbors': [3, 5],  # Use fewer values to test
    'weights': ['uniform'],  # Try with just one weight for now
    'metric': ['euclidean']  # Test one distance metric at a time
}

# KNN classifier
knn_classifier = KNeighborsClassifier()

# Initialize GridSearchCV
grid_search_knn = GridSearchCV(estimator=knn_classifier, param_grid=param_grid, cv=5, scoring='accuracy')

# Training
grid_search_knn.fit(X_train, y_train)


In [8]:
# test Accuracy
y_test_pred = grid_search_knn.predict(X_test)

accuracy = np.mean(y_test_pred == y_test) * 100
print("Test Accuracy:", accuracy)

Test Accuracy: 76.19047619047619


In [9]:
print(y_test_pred)
print(y_test)

['Pesticide_low' 'Fresh' 'Fresh' 'Fresh' 'Fungicide_low' 'Fresh' 'Fresh'
 'Fungicide_low' 'Fresh' 'Fresh' 'Fungicide_high' 'Fresh' 'Fresh' 'Fresh'
 'Fungicide_low' 'Fungicide_low' 'Fungicide_low' 'Fresh' 'Fresh'
 'Pesticide_high' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Pesticide_low' 'Fresh'
 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh'
 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh'
 'Pesticide_high' 'Pesticide_low' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh'
 'Fresh' 'Pesticide_low' 'Fresh' 'Fungicide_high' 'Fresh' 'Fresh' 'Fresh'
 'Fresh' 'Fungicide_low' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh'
 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Pesticide_high' 'Fresh' 'Fungicide_low'
 'Fresh' 'Fresh' 'Pesticide_low' 'Fresh' 'Fresh' 'Fungicide_low' 'Fresh'
 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh'
 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh'
 'Fungicide_low' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fresh' 'Fres

In [17]:
# Initialize the encoder
le= LabelEncoder()
y_encoded= le.fit_transform(y)

X_train2, X_test2, y_train2, y_test2 = train_test_split(Xn, y_encoded, test_size=0.25, random_state=42)

X_train2.shape,y_train2.shape

((315, 300), (315,))

In [19]:
unique_classes = np.unique(y)
print(unique_classes)

unique_classes_encoded = np.unique(y_encoded)
print(unique_classes_encoded)


['Fresh' 'Fungicide_high' 'Fungicide_low' 'Pesticide_high' 'Pesticide_low']
[0 1 2 3 4]


In [20]:
# Train XGBoost
'''
model = XGBClassifier()
model.fit(X_train2, y_train2)

# Check predictions
predictions = model.predict(X_test2)
print("Predictions:", predictions)
'''

xgb_classifier = XGBClassifier()

'''
# Define parameter grid for GridSearchCV
param_grid = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.1, 0.01, 0.001],
    'n_estimators': [100, 200, 250, 300]
}
'''

# Define parameter grid for GridSearchCV
param_grid = {
    'max_depth': [7],
    'learning_rate': [0.01],
    'n_estimators': [100]
}


# GridSearchCV for XGBoost Classifier
xgb_grid_search = GridSearchCV(estimator=xgb_classifier, param_grid=param_grid, cv=5, scoring='accuracy')
xgb_grid_search.fit(X_train2, y_train2)

# Get best parameters for XGBoost Classifier
best_params = xgb_grid_search.best_params_
print("Best Parameters:", best_params)

Best Parameters: {'learning_rate': 0.01, 'max_depth': 7, 'n_estimators': 100}


In [21]:
# Get the best estimator
best_xgb_model = xgb_grid_search.best_estimator_

# Make predictions on the test set
y_pred = best_xgb_model.predict(X_test2)

# Calculate the accuracy
accuracy = accuracy_score(y_test2, y_pred)
print("Accuracy:", accuracy*100)

Accuracy: 75.23809523809524


In [None]:
'''
y_pred = model.predict(X_test2)

accuracy = np.mean(y_pred == y_test2) * 100
print("Test Accuracy:", accuracy)
'''

In [22]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

param_dist = {
    
    'C': [0.1, 1, 10, 100],         
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],  
    'gamma': ['scale', 'auto'],
    'degree': [2, 3, 4, 5], 
    'coef0': [0.0, 0.1, 0.5, 1.0] 
    
}
# SVM classifier
svc = SVC()

# Instantiate the grid search
random_search = RandomizedSearchCV(svc, param_distributions=param_dist, n_iter=10, cv=5, verbose=2, random_state=42, n_jobs=-1)

# Training
random_search.fit(X_train, y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


In [23]:

# Best parameters found by RandomizedSearchCV
print("Best parameters:", random_search.best_params_)

# Use the best model to predict on the test data
y_pred = random_search.best_estimator_.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy*100)


Best parameters: {'kernel': 'linear', 'gamma': 'scale', 'degree': 4, 'coef0': 0.1, 'C': 10}
Accuracy: 89.52380952380953


In [24]:
from sklearn.naive_bayes import GaussianNB

# Initialize Gaussian Naive Bayes classifier
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

In [25]:
# test Accuracy
y_test_pred = nb_classifier.predict(X_test)

accuracy = np.mean(y_test_pred == y_test) * 100
print("Test Accuracy:", accuracy)

Test Accuracy: 63.8095238095238
