#Bukenya Andrew
#2021/HD05/2288U
#2100702288

##Feature Extraction and Classifying with K-Nearest Neighbors (KNN)

In [1]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [2]:
%cd /content/gdrive/MyDrive/AndrewsProject1/project_files/crop_classisfication

/content/gdrive/MyDrive/AndrewsProject1/project_files/crop_classisfication


In [3]:
# Importing required modules
import pickle
import numpy as np
import matplotlib.pyplot as plt
import cv2
from time import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import fetch_lfw_people
from sklearn.metrics import classification_report
from sklearn import svm
import joblib

def data(X_path,Y_path):
  """RestorING X and Y lists from the pickle files"""
  # Open the X.pickle file
  pickle_in = open(X_path, "rb")
  X = pickle.load(pickle_in)
  # Open the Y.pickle file
  pickle_in = open(Y_path, "rb")
  Y = pickle.load(pickle_in)
  return X, Y

def sift(img):
  #Create SIFT method to exclude features, and return kp and des
  sift = cv2.xfeatures2d.SIFT_create()
  # Determining the  extracted features and their quantity
  kp, des = sift.detectAndCompute(img,None)
  return kp, des

def surf(img):
  #Create SURF method to exclude features, and return kp and des"""
  surf = cv2.xfeatures2d.SURF_create()
  # Determining the  extracted features and their quantity
  kp, des = surf.detectAndCompute(img,None)
  return kp, des

#Added ORB to compare its features with those of sift and surf
def orb(img):
  #Create ORB method to exclude features, and return kp and des
  orb = cv2.ORB_create()
  # Determining the extracted features and their quantity
  kp, des = orb.detectAndCompute(img,None)
  return kp, des

def feature_number(feature):
  """Creating a list with the features of individual images, and returning list_data and ind"""
  # Creating a blank list ind
  ind = []
  # Create a blank list_data list
  list_data = []
  t0 = time()
  # Iteration from 0 to the total number of data in X
  for i in range(len(X)):
    # Execution of SIFT, SURF and ORB functions
    kp, des = feature(X[i])
    # If the number of features in that image is less than 20, the image does not qualify
    if len(kp) < 20:
      # Adding to ind list
      ind.append(i)
      continue
    # Forming a feature of equal size (equal number of data)
    des = des[0:20,:]
    # Formation of the obtained feature data in the form 1, len (des) * len (des [1])
    vector_data = des.reshape(1,len(des)*len(des[1]))
    # Adding vector_data to the list_data list
    list_data.append(vector_data)
  # List of names of feature extraction methods
  features = ['sift', 'surf', "orb"]
  print("Algorithm time: %0.3fs" % (time() - t0))
  return list_data, ind

#def knn_save(path):
 #"""Saving SVM model"""
#joblib.dump(path)

if __name__ == '__main__': 
  # List of categories
  categories = ["banana", "cassava"]
  # The directory where the X and Y data is located
  X_path = "/content/gdrive/MyDrive/AndrewsProject1/project_files/X.pickle"
  Y_path = "/content/gdrive/MyDrive/AndrewsProject1/project_files/Y.pickle"

  # Image width
  IMG_W = int(161)
  # Image height
  IMG_H = int(150)
  # Executing the data() function
  X, Y = data(X_path, Y_path)
  # List of names of feature extraction methods
  features = ['sift', 'surf', 'orb']
  a = 0
  # Iterate through individual features
  for feature in [sift, surf, orb]:
    t1 = time()
    # Copying data from Y
    labels = Y[:]
    # Executing the feature_number() function
    list_data, ind = feature_number(feature)
    # Iterate through the list to delete data that didn't meet a sufficient number of features.
    for i in sorted(ind, reverse=True):
      del labels[i]

    # Creating a vector in the form of len (labels), len (list_data [0] [0])
    data = np.array(list_data).reshape(len(labels),len(list_data[0][0]))
    # Creating a vector
    le = LabelEncoder()
    labels = le.fit_transform(labels)
    # Division of dataset into train and test data

    X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.3, random_state=42) # 70% training and 30% test

    print("\n Output samples after splitting the data")
    print("\nX_train")
    print(X_train)

    print("\n X_test")
    print(X_test)

    print("\n y_train")
    print(y_train)

    print("\n y_test")
    print(y_test)

    #Defining the model and training.
    # Import the KNN model 

    from sklearn.neighbors import KNeighborsClassifier

    # Instantiate model with 5 neighbors
    model = KNeighborsClassifier(n_neighbors = 5)

    # Train the model on training data
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
  
    print("\n Accuracy:",metrics.accuracy_score(y_test, y_pred))
    # Model precision: what is the percentage of positive identifications in a set of positively classified data? TP / (TP + FP)
    print("\n Precision:",metrics.precision_score(y_test, y_pred, average='micro'))
    # Model recall: what is the percentage of positive identifications in the set of all positive data? TP / (TP + FN)
    print("\n Recall:",metrics.recall_score(y_test, y_pred, average='micro'))
    # Table of results obtained

    print("\n CLASSIFICATION REPORT \n")
    print(classification_report(y_test, y_pred, target_names=categories))
    #print("Model testing time: %0.3fs" % (time() - t0))

    print("\nThe program executed in: %0.3fs" % (time() - t1))

  

Algorithm time: 2.300s

 Output samples after splitting the data

X_train
[[  0.   0.   0. ...   0.   0.   0.]
 [ 41. 139.  91. ...   7.   2.   2.]
 [ 16.   3.   0. ...   0.   0.  78.]
 ...
 [  3.   0.   0. ...   1.   2.   2.]
 [105.  62.  25. ...  15.   5.  40.]
 [  0.   0.   0. ...   0.   0.   0.]]

 X_test
[[ 0.  0.  0. ... 69.  6. 18.]
 [14.  2.  0. ... 25. 10.  0.]
 [39. 33. 29. ...  0.  0.  1.]
 ...
 [ 1.  0.  0. ...  0.  0.  0.]
 [93.  1.  0. ...  0.  0.  0.]
 [ 0. 64. 61. ...  0.  0.  0.]]

 y_train
[0 1 0 0 1 0 0 0 1 0 1 0 1 1 0 1 1 1 1 1 0 0 0 1 0 1 1 0 1 1 1 1 1 1 0 0 0
 0 1 1 1 1 0 1 0 0 1 1 1 0 0 1 1 0 1 0 0 0 1 0 1 1 0 1 0 0 0 1 1 0 1 0 0 0
 0 1 0 0 0 1 1]

 y_test
[1 1 0 1 1 1 1 1 0 0 1 0 0 0 1 1 1 0 1 1 0 1 1 0 0 0 0 1 1 0 1 0 1 0 0 0]

 Accuracy: 0.8611111111111112

 Precision: 0.8611111111111112

 Recall: 0.8611111111111112

 CLASSIFICATION REPORT 

              precision    recall  f1-score   support

      banana       0.93      0.76      0.84        17
     cassav

error: ignored