In [16]:
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image  
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc, accuracy_score
import seaborn as sns

In [17]:
dataset_dir = r"C:\Users\HP\Desktop\ml\PlantVillage"
classes = ['Tomato_healthy', 'Tomato_Early_blight', 'Tomato_Late_blight', 'Pepper__bell___healthy']

In [18]:
image_size = (64, 64)  
k_values = [3, 5, 7]  

In [19]:
def load_images(dataset_dir, classes, image_size):
    images, labels = [], []
    for class_name in classes:
        class_dir = os.path.join(dataset_dir, class_name)
        for file in os.listdir(class_dir):
            img_path = os.path.join(class_dir, file)
            try:
                with Image.open(img_path) as img:
                    img = img.resize(image_size).convert('RGB')  
                    img_array = np.asarray(img, dtype=np.float32)  
                    images.append(img_array)
                    labels.append(class_name)
            except Exception as e:
                print(f"Error loading {img_path}: {e}")
    return np.array(images), np.array(labels)


In [None]:
images, labels = load_images(dataset_dir, classes, image_size)
X = images.reshape(images.shape[0], -1)  
lb = LabelBinarizer()
y = lb.fit_transform(labels)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  
X_test = scaler.transform(X_test)

In [None]:
losses = []
accuracies = []
iterations = [10, 50, 100, 200, 500, 1000]

for max_iter in iterations:
    lr_model = LogisticRegression(max_iter=max_iter, solver='lbfgs', random_state=42)
    lr_model.fit(X_train, np.argmax(y_train, axis=1))
    y_pred_lr = lr_model.predict(X_test)
    acc = accuracy_score(np.argmax(y_test, axis=1), y_pred_lr)
    losses.append(lr_model.n_iter_[0])  
    accuracies.append(acc)

In [None]:
plt.figure()
plt.plot(iterations, losses, marker='o', label='Loss (approximation)')
plt.title("Loss Curve (Logistic Regression)")
plt.xlabel("Iterations")
plt.ylabel("Loss (number of iterations)")
plt.legend()
plt.show()

In [None]:
plt.figure()
plt.plot(iterations, accuracies, marker='o', label='Accuracy')
plt.title("Accuracy Curve (Logistic Regression)")
plt.xlabel("Iterations")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

In [None]:
y_pred_lr = lr_model.predict(X_test)
conf_matrix = confusion_matrix(np.argmax(y_test, axis=1), y_pred_lr)
sns.heatmap(conf_matrix, annot=True, fmt='d', xticklabels=classes, yticklabels=classes)
plt.title("Confusion Matrix (Logistic Regression)")
plt.show()

In [None]:
y_score = lr_model.predict_proba(X_test)
fpr, tpr, _ = roc_curve(y_test.ravel(), y_score.ravel())
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='blue', lw=2, label=f'Logistic Regression (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
plt.title("ROC Curve (Logistic Regression)")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc="lower right")
plt.show()

In [None]:
for k in k_values:
    knn_model = KNeighborsClassifier(n_neighbors=k)
    knn_model.fit(X_train, np.argmax(y_train, axis=1))
    y_pred_knn = knn_model.predict(X_test)
    accuracy = accuracy_score(np.argmax(y_test, axis=1), y_pred_knn)
    print(f"KNN (k={k}) Accuracy: {accuracy:.2f}")

    conf_matrix_knn = confusion_matrix(np.argmax(y_test, axis=1), y_pred_knn)
    sns.heatmap(conf_matrix_knn, annot=True, fmt='d', xticklabels=classes, yticklabels=classes)
    plt.title(f"Confusion Matrix (KNN, k={k})")
    plt.show()

    y_score_knn = knn_model.predict_proba(X_test)
    fpr, tpr, _ = roc_curve(y_test.ravel(), y_score_knn.ravel())
    roc_auc_knn = auc(fpr, tpr)

    plt.figure()
    plt.plot(fpr, tpr, lw=2, label=f'KNN (k={k}) AUC = {roc_auc_knn:.2f}')
    plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
    plt.title(f"ROC Curve (KNN, k={k})")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.legend(loc="lower right")
    plt.show()