# Classification of Agricultural Pests using different ML models

In [1]:
from PIL import Image
import numpy as np
import pandas as pd
import csv
import os
from skimage.feature import graycomatrix, graycoprops

In [2]:
inputDirectory = "/kaggle/input/agricultural-pests-image-dataset/"

In [3]:
with open("/kaggle/working/features.csv", 'a', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Image','red_sum', 'green_sum', 'blue_sum', 'correlation', 'energy', 'contrast', 'homogeneity','label'])

In [4]:
def analyze_image(image_path):
    # Load the image
    image = Image.open(image_path)
    # Convert the image to RGB mode (if it's not already)
    image = image.convert("RGB")
    # Convert the image to a numpy array
    image_array = np.array(image)

    # Calculate the sums of colors on each channel
    red_sum = np.sum(image_array[:, :, 0])
    green_sum = np.sum(image_array[:, :, 1])
    blue_sum = np.sum(image_array[:, :, 2])

    # Convert the image to grayscale
    grayscale_image = image.convert("L")
    # Convert the grayscale image to a numpy array
    grayscale_array = np.array(grayscale_image)

    # Calculate the texture properties using greycomatrix and greycoprops
    glcm = graycomatrix(grayscale_array, distances=[1], angles=[0], symmetric=True, normed=True)
    correlation = graycoprops(glcm, 'correlation')[0, 0]
    energy = graycoprops(glcm, 'energy')[0, 0]
    contrast = graycoprops(glcm, 'contrast')[0, 0]
    homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]

    # Return the calculated features as a list
    features = [image_array, red_sum, green_sum, blue_sum, correlation, energy, contrast, homogeneity]
    return features

In [5]:
for folder in os.listdir(inputDirectory):
    print("Inside folder {}".format(folder))
    folderPath = os.path.join(inputDirectory, folder)
    for img in os.listdir(folderPath):
        imgPath = os.path.join(folderPath, img)
        features = analyze_image(imgPath)
        features.append(folder)
        with open("/kaggle/working/features.csv", 'a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(features)
        print("Processed Image: {}".format(imgPath))

Inside folder beetle
Processed Image: /kaggle/input/agricultural-pests-image-dataset/beetle/beetle (219).jpg
Processed Image: /kaggle/input/agricultural-pests-image-dataset/beetle/beetle (285).jpg
Processed Image: /kaggle/input/agricultural-pests-image-dataset/beetle/beetle (124).jpg
Processed Image: /kaggle/input/agricultural-pests-image-dataset/beetle/beetle (342).jpg
Processed Image: /kaggle/input/agricultural-pests-image-dataset/beetle/beetle (320).jpg
Processed Image: /kaggle/input/agricultural-pests-image-dataset/beetle/beetle (186).jpg
Processed Image: /kaggle/input/agricultural-pests-image-dataset/beetle/beetle (145).jpg
Processed Image: /kaggle/input/agricultural-pests-image-dataset/beetle/beetle (88).jpg
Processed Image: /kaggle/input/agricultural-pests-image-dataset/beetle/beetle (355).jpg
Processed Image: /kaggle/input/agricultural-pests-image-dataset/beetle/beetle (171).jpg
Processed Image: /kaggle/input/agricultural-pests-image-dataset/beetle/beetle (203).jpg
Processed Im

## Load the data to a csv file and perform data standardization

In [6]:
df = pd.read_csv("/kaggle/working/features.csv")

In [7]:
df.head()

Unnamed: 0,Image,red_sum,green_sum,blue_sum,correlation,energy,contrast,homogeneity,label
0,[[[119 124 128]\n [116 121 124]\n [137 138 1...,8541019,8546918,8231608,0.906282,0.012002,323.347946,0.087765,beetle
1,[[[240 249 255]\n [240 249 255]\n [240 249 2...,13530466,13407083,13381422,0.950554,0.1911,142.611037,0.546303,beetle
2,[[[237 237 235]\n [237 237 235]\n [237 237 2...,12309764,11363292,10872450,0.965044,0.063243,212.055403,0.496337,beetle
3,[[[ 23 44 1]\n [ 23 44 1]\n [ 23 44 ...,5571475,7941488,2867010,0.98697,0.043663,45.376496,0.486561,beetle
4,[[[141 178 98]\n [142 179 99]\n [143 180 1...,5733322,7479809,2718236,0.971701,0.032272,107.877303,0.451339,beetle


In [8]:
df = df.drop(columns = ['Image'])

In [9]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [10]:
xTrain, xTest, yTrain, yTest = train_test_split(df.drop(columns=['label']), df['label'], train_size=0.8, random_state=42)

In [11]:
labelEncoder = LabelEncoder()
labelEncoder.fit(yTrain)
print(labelEncoder.classes_)
yTrain = labelEncoder.transform(yTrain)
yTest = labelEncoder.transform(yTest)

['ants' 'bees' 'beetle' 'catterpillar' 'earthworms' 'earwig' 'grasshopper'
 'moth' 'slug' 'snail' 'wasp' 'weevil']


In [12]:
from sklearn.preprocessing import StandardScaler

In [13]:
scaler = StandardScaler()
scaler.fit(xTrain)
xTrain = scaler.transform(xTrain)
xTest = scaler.transform(xTest)

In [14]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, cohen_kappa_score

In [15]:
with open("/kaggle/working/metrics.csv", 'a', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Model','Accuracy','Precision','Senstivity','F1 Score','MCC Score','Kappa Coeff'])

### Support Vector Machine

In [16]:
from sklearn.svm import SVC

In [17]:
modelSVC = SVC()

In [18]:
modelSVC.fit(xTrain, yTrain)

In [19]:
yPred = modelSVC.predict(xTest)

In [20]:
accuracy = accuracy_score(yTest, yPred)
precision = precision_score(yTest, yPred, average='micro')
senstivity = recall_score(yTest, yPred, average='micro')
f1 = f1_score(yTest, yPred, average='micro')
mcc = matthews_corrcoef(yTest, yPred)
kappa = cohen_kappa_score(yTest, yPred)

In [21]:
with open("/kaggle/working/metrics.csv", 'a', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['SVM',accuracy,precision,senstivity,f1,mcc,kappa])

### Decision Tree 

In [22]:
from sklearn.tree import DecisionTreeClassifier
modelDecisionTree = DecisionTreeClassifier()
modelDecisionTree.fit(xTrain, yTrain)
yPred = modelDecisionTree.predict(xTest)
accuracy = accuracy_score(yTest, yPred)
precision = precision_score(yTest, yPred, average='micro')
senstivity = recall_score(yTest, yPred, average='micro')
f1 = f1_score(yTest, yPred, average='micro')
mcc = matthews_corrcoef(yTest, yPred)
kappa = cohen_kappa_score(yTest, yPred)
with open("/kaggle/working/metrics.csv", 'a', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Decision Tree',accuracy,precision,senstivity,f1,mcc,kappa])

### Logistic Regression

In [23]:
from sklearn.linear_model import LogisticRegression
modelLogisticRegression = LogisticRegression()
modelLogisticRegression.fit(xTrain, yTrain)
yPred = modelLogisticRegression.predict(xTest)
accuracy = accuracy_score(yTest, yPred)
precision = precision_score(yTest, yPred, average='micro')
senstivity = recall_score(yTest, yPred, average='micro')
f1 = f1_score(yTest, yPred, average='micro')
mcc = matthews_corrcoef(yTest, yPred)
kappa = cohen_kappa_score(yTest, yPred)
with open("/kaggle/working/metrics.csv", 'a', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Logistic Regression',accuracy,precision,senstivity,f1,mcc,kappa])

### Random Forest Classifier

In [24]:
from sklearn.ensemble import RandomForestClassifier
modelRandomForestClassifier = RandomForestClassifier()
modelRandomForestClassifier.fit(xTrain, yTrain)
yPred = modelRandomForestClassifier.predict(xTest)
accuracy = accuracy_score(yTest, yPred)
precision = precision_score(yTest, yPred, average='micro')
senstivity = recall_score(yTest, yPred, average='micro')
f1 = f1_score(yTest, yPred, average='micro')
mcc = matthews_corrcoef(yTest, yPred)
kappa = cohen_kappa_score(yTest, yPred)
with open("/kaggle/working/metrics.csv", 'a', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Random Forest',accuracy,precision,senstivity,f1,mcc,kappa])

### XGBClassifier

In [25]:
from xgboost import XGBClassifier
modelXGBClassifier = XGBClassifier()
modelXGBClassifier.fit(xTrain, yTrain)
yPred = modelXGBClassifier.predict(xTest)
accuracy = accuracy_score(yTest, yPred)
precision = precision_score(yTest, yPred, average='micro')
senstivity = recall_score(yTest, yPred, average='micro')
f1 = f1_score(yTest, yPred, average='micro')
mcc = matthews_corrcoef(yTest, yPred)
kappa = cohen_kappa_score(yTest, yPred)
with open("/kaggle/working/metrics.csv", 'a', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['XGBoost',accuracy,precision,senstivity,f1,mcc,kappa])

### KNN Model


In [26]:
from sklearn.neighbors import KNeighborsClassifier
modelKNeighborsClassifier = KNeighborsClassifier()
modelKNeighborsClassifier.fit(xTrain, yTrain)
yPred = modelKNeighborsClassifier.predict(xTest)
accuracy = accuracy_score(yTest, yPred)
precision = precision_score(yTest, yPred, average='micro')
senstivity = recall_score(yTest, yPred, average='micro')
f1 = f1_score(yTest, yPred, average='micro')
mcc = matthews_corrcoef(yTest, yPred)
kappa = cohen_kappa_score(yTest, yPred)
with open("/kaggle/working/metrics.csv", 'a', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['KNN',accuracy,precision,senstivity,f1,mcc,kappa])

## Model Accuracy

In [27]:
result = pd.read_csv("/kaggle/working/metrics.csv")

In [28]:
result

Unnamed: 0,Model,Accuracy,Precision,Senstivity,F1 Score,MCC Score,Kappa Coeff
0,SVM,0.2202,0.2202,0.2202,0.2202,0.148648,0.145967
1,Decision Tree,0.124659,0.124659,0.124659,0.124659,0.043905,0.043898
2,Logistic Regression,0.192903,0.192903,0.192903,0.192903,0.119376,0.116423
3,Random Forest,0.207461,0.207461,0.207461,0.207461,0.134454,0.133955
4,XGBoost,0.207461,0.207461,0.207461,0.207461,0.134474,0.13412
5,KNN,0.182894,0.182894,0.182894,0.182894,0.109594,0.10794
