In [2]:
from tqdm import tqdm
import os
import numpy as np
from PIL import Image

#1. Define const
TS = (64, 64)

#2. Function for loading data
def Load_Data(directory):
    X = []
    y = []

    # collect all image paths first
    image_paths = []
    labels = []

    for folder_name in os.listdir(directory):
        class_path = os.path.join(directory, folder_name)
        if not os.path.isdir(class_path):
            continue

        for image_name in os.listdir(class_path):
            image_paths.append(os.path.join(class_path, image_name))
            labels.append(folder_name)

    # single progress bar
    for image_path, label in tqdm(
        zip(image_paths, labels),
        total=len(image_paths),
        desc="Loading images"
    ):
        img = Image.open(image_path).convert("RGB")
        img = img.resize(TS)

        img_array = np.array(img)
        vector = img_array.flatten()

        X.append(vector)
        y.append(label)

    return np.array(X), np.array(y)


In [4]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score
from tqdm import tqdm

#3. Main execution :
 #step A: Prepare Data
X, y = Load_Data("/kaggle/input/plantvillage-dataset/color")
scaler = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)


from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
pca = PCA(n_components=256, random_state=42)
X_train = pca.fit_transform(X_train)
X_test  = pca.transform(X_test)
rf = RandomForestClassifier(
    n_estimators=300,
    n_jobs=-1,
    verbose=1,
    random_state=42
)

rf.fit(X_train, y_train)
Predictions = rf.predict(X_test)
accuracy = accuracy_score(y_test, Predictions)
print("random forest:", accuracy)

Loading images: 100%|██████████| 54305/54305 [09:50<00:00, 91.95it/s] 
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   36.6s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:  4.2min finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.6s


SVM Accuracy: 0.6473621213516251


[Parallel(n_jobs=4)]: Done 300 out of 300 | elapsed:    0.8s finished


In [None]:
 # Step B: Level 1 Baseline (Sanity Check)
DM = DummyClassifier(strategy = "most_frequent")
DM.fit(X_train, y_train)
y_pred = DM.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Dummy Accuracy:", accuracy)

In [3]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score
from tqdm import tqdm

#3. Main execution :
 #step A: Prepare Data
X, y = Load_Data("/kaggle/input/plantvillage-dataset/color")
scaler = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score

# Step C: Level 2 Baseline (The Real Work)
pca = PCA(n_components=256, random_state=42)
X_train = pca.fit_transform(X_train)
X_test  = pca.transform(X_test)
My_Model = SVC(kernel="linear", C=1.0)
# Train
My_Model.fit(X_train, y_train)

# Predict
Predictions = My_Model.predict(X_test)

# Evaluate
accuracy = accuracy_score(y_test, Predictions)
print("SVM Accuracy:", accuracy)

Loading images: 100%|██████████| 54305/54305 [08:04<00:00, 112.08it/s]


SVM Accuracy: 0.7490102200534021


In [5]:
from sklearn.metrics import confusion_matrix

# true labels and predicted labels
y_true = y_test
y_pred = My_model.predict(X_test)

cm = confusion_matrix(y_true, y_pred)
print(cm)

NameError: name 'My_model' is not defined