In [1]:
#Importing required libraries
import os
import cv2
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
import joblib
import time

## Loding the dataset

In [2]:
#load Dataset & analyse
data_dir="Dataset"

#List all classes(disease categories)
classes=os.listdir(data_dir)
print(f"Number of classes:{len(classes)}")
print("Classes:",classes[:4])

#count images in each class (display first few)
for cls in classes[:5]:
    print(f"{cls}:{len(os.listdir(os.path.join(data_dir,cls)))} images")

Number of classes:5
Classes: ['Potato___healthy', 'Potato___Late_blight', 'Tomato___Bacterial_spot', 'Tomato___Early_blight']
Potato___healthy:152 images
Potato___Late_blight:1000 images
Tomato___Bacterial_spot:2127 images
Tomato___Early_blight:1000 images
Tomato___healthy:1591 images


## load and Preprocess Image(Encode Labels)

In [3]:
classes

['Potato___healthy',
 'Potato___Late_blight',
 'Tomato___Bacterial_spot',
 'Tomato___Early_blight',
 'Tomato___healthy']

In [4]:
cls_path=os.path.join(data_dir,cls)
cls_path

'Dataset\\Tomato___healthy'

In [5]:
X=[]
y=[]
s=time.time()
j=0
for cls in classes:
    cls_path=os.path.join(data_dir,cls)
    j+=1
    i=0
    for img_name in os.listdir(cls_path)[:500]: #Limit for faster processing
        img_path=os.path.join(cls_path,img_name)
        img=cv2.imread(img_path)
        if img is not None:
            X.append(img)
            y.append(cls)
            i+=1
    print(f'Loaded {i} images of class {j}-{cls} in {np.round((time.time()-s)/60,2)}minutes')
    

Loaded 152 images of class 1-Potato___healthy in 0.0minutes
Loaded 500 images of class 2-Potato___Late_blight in 0.01minutes
Loaded 500 images of class 3-Tomato___Bacterial_spot in 0.01minutes
Loaded 500 images of class 4-Tomato___Early_blight in 0.02minutes
Loaded 500 images of class 5-Tomato___healthy in 0.03minutes


In [6]:
#prepare a numpy 
type(X),type(X[0]),X[0].shape

(list, numpy.ndarray, (256, 256, 3))

In [7]:
X=np.array(X)
y=np.array(y)
print("Image data shape:",X.shape)

Image data shape: (2152, 256, 256, 3)


In [13]:
#Encode Labels
encoder=LabelEncoder()
y_encoder=encoder.fit_transform(y)
print("Encoded classes:",encoder.classes_)

Encoded classes: ['Potato___Late_blight' 'Potato___healthy' 'Tomato___Bacterial_spot'
 'Tomato___Early_blight' 'Tomato___healthy']


In [14]:
#save the encoded Labels
joblib.dump(encoder,"label_encoder.pkl")

['label_encoder.pkl']

# ML(Color Histogram)

## Extracting Features(Feature Engineering)

In [15]:
#Extract Color Histogram Features
features=[]
for img in X:
    hsv_img=cv2.cvtColor((img).astype('uint8'),cv2.COLOR_BGR2HSV)
    hist=cv2.calcHist([hsv_img],[0,1,2],None,[8,8,8],[0,256,0,256,0,256])
    hist=cv2.normalize(hist,hist).flatten()
    features.append(hist)

features=np.array(features)
print("Features shape:",features.shape)

Features shape: (2152, 512)


### Split for Train and Test sets

In [18]:
#splt the data
X_train,X_test,y_train,y_test=train_test_split(features,y_encoder,test_size=0.2,random_state=42,stratify=y)

In [19]:
#save as pkl files for deployment
joblib.dump(X_test,"X_test.pkl")
joblib.dump(y_test,"y_test.pkl")


['y_test.pkl']

In [21]:
print('X train',X_train.shape,X_train[0].shape)
print('y train',y_train.shape,end='\n'*2)
print('X test',X_test.shape,X_test[0].shape)
print('Y test',y_test.shape)

X train (1721, 512) (512,)
y train (1721,)

X test (431, 512) (512,)
Y test (431,)


## Train the model

In [22]:
#train Logistic Regression Model
from sklearn.linear_model import LogisticRegression

model= LogisticRegression(max_iter=500)
model.fit(X_train,y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,500


In [23]:
#save the model for deployment
joblib.dump(model,"model.pkl")

['model.pkl']

## Prediction Result and Model Evaluation

In [25]:
#predict
y_pred=model.predict(X_test)

#Evaluation
print("Accuracy:",np.round(accuracy_score(y_test,y_pred)*100,2))
print(classification_report(y_test,y_pred))

Accuracy: 89.56
              precision    recall  f1-score   support

           0       0.86      0.83      0.84       100
           1       0.83      0.77      0.80        31
           2       0.94      0.94      0.94       100
           3       0.86      0.88      0.87       100
           4       0.94      0.97      0.96       100

    accuracy                           0.90       431
   macro avg       0.89      0.88      0.88       431
weighted avg       0.89      0.90      0.90       431



In [26]:
#single prediction
#test_img=np.array(np.arange(512)) 
test_img=X_test[46]
model.predict(test_img.reshape(1,-1))

array([3])

## Confusion Matrix