In [11]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import graycomatrix, graycoprops, local_binary_pattern
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score

import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
import xgboost as xgb # pip install xgboost

In [12]:
def extract_glcm_features(image):
    # Ubah ke grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Hitung GLCM (jarak 1 pixel, sudut 0, 45, 90, 135 derajat)
    # levels=256 karena citra 8-bit standard
    glcm = graycomatrix(gray, distances=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4], 
                        levels=256, symmetric=True, normed=True)
    
    features = []
    # Properti yang ingin diambil
    props = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM']
    
    for prop in props:
        # Karena kita punya 4 sudut, kita ambil rata-ratanya agar fitur invarian terhadap rotasi
        val = graycoprops(glcm, prop).mean()
        features.append(val)
        
    return np.array(features)

In [13]:
def extract_lbp_features(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Parameter LBP
    radius = 1
    n_points = 8 * radius
    
    # Hitung LBP (method 'uniform' bagus untuk rotasi invarian)
    lbp = local_binary_pattern(gray, n_points, radius, method='uniform')
    
    # Hitung histogram dari hasil LBP
    # n_points + 2 adalah jumlah bin untuk method 'uniform'
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    
    # Normalisasi histogram agar tidak terpengaruh ukuran gambar
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-7)
    
    return hist

In [14]:
# load data 

def load_and_extract_features(dataset_path):
    data_features = []
    labels = []
    
    for folder_name in os.listdir(dataset_path):
        folder_path = os.path.join(dataset_path, folder_name)
        
        if os.path.isdir(folder_path):
            print(f"Processing class: {folder_name}")
            
            for filename in os.listdir(folder_path):
                img_path = os.path.join(folder_path, filename)
                
                img = cv2.imread(img_path)
                if img is None:
                    continue
                
                # Resize gambar untuk mempercepat komputasi (opsional, tapi disarankan)
                img = cv2.resize(img, (256, 256))
                
                # --- EKSTRAKSI FITUR GABUNGAN ---
                feat_glcm = extract_glcm_features(img)
                feat_lbp = extract_lbp_features(img)
                feat_hsv = extract_hsv_features(img)
                
                # Gabungkan semua fitur menjadi satu vector panjang
                global_feature = np.concatenate([feat_glcm, feat_lbp, feat_hsv])
                
                data_features.append(global_feature)
                labels.append(folder_name)
                
    return np.array(data_features), np.array(labels)

In [15]:
def extract_hsv_features(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    
    # Hitung histogram untuk setiap channel (Hue, Saturation, Value)
    # bins=8 per channel agar fitur tidak terlalu banyak (total 24 fitur)
    bins = 8
    hist_h = cv2.calcHist([hsv], [0], None, [bins], [0, 180])
    hist_s = cv2.calcHist([hsv], [1], None, [bins], [0, 256])
    hist_v = cv2.calcHist([hsv], [2], None, [bins], [0, 256])
    
    # Normalisasi
    cv2.normalize(hist_h, hist_h)
    cv2.normalize(hist_s, hist_s)
    cv2.normalize(hist_v, hist_v)
    
    # Gabungkan menjadi satu array datar
    return np.concatenate([hist_h.flatten(), hist_s.flatten(), hist_v.flatten()])

In [None]:
# ex
dataset_folder = 'dataset/' 

if os.path.exists(dataset_folder):
    X, y = load_and_extract_features(dataset_folder)
    
    # Encoding label string (misal: 'Candida') menjadi angka
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    
    print(f"\nJumlah Data: {len(X)}")
    print(f"Dimensi Fitur: {X.shape[1]}") # Cek berapa total fitur yang didapat
    
    # Split Data 80% Train, 20% Test
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
    
    # Random Forest
    print("\nSedang melatih model Random Forest...")
    rf_model = RandomForestClassifier()
    rf_model.fit(X_train, y_train)
    
    # Prediksi
    y_pred = rf_model.predict(X_test)
    
    # Evaluasi
    print("\n=== HASIL EVALUASI ===")
    print(f"Akurasi: {accuracy_score(y_test, y_pred) * 100:.2f}%")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=le.classes_))

    print("\n=== 2. SUPPORT VECTOR MACHINE (SVM) ===")
    print("Sedang melatih model SVM...")
    # kernel='linear' is often best for HOG features. 
    # If accuracy is low, try kernel='rbf'
    svm_model = SVC(kernel='linear', C=1.0, random_state=42) 
    svm_model.fit(X_train, y_train) # Must use Scaled data

    y_pred_svm = svm_model.predict(X_test)

    print(f"Akurasi SVM: {accuracy_score(y_test, y_pred_svm) * 100:.2f}%")
    print("Classification Report SVM:")
    print(classification_report(y_test, y_pred_svm, target_names=le.classes_))


    print("\n=== 3. K-NEAREST NEIGHBORS (KNN) ===")
    print("Sedang melatih model KNN...")
    # n_neighbors=5 is standard. You can tune this (e.g., 3, 5, 7, 9)
    knn_model = KNeighborsClassifier(n_neighbors=5)
    knn_model.fit(X_train, y_train) # Must use Scaled data

    y_pred_knn = knn_model.predict(X_test)

    print(f"Akurasi KNN: {accuracy_score(y_test, y_pred_knn) * 100:.2f}%")
    print("Classification Report KNN:")
    print(classification_report(y_test, y_pred_knn, target_names=le.classes_))

    print("\n=== 4. XGBOOST ===")
    print("Sedang melatih model XGBoost...")
    # XGBoost requires labels to be integers (0, 1, 2...), which LabelEncoder provides
    xgb_model = xgb.XGBClassifier(
        objective='multi:softprob', 
        random_state=42,
        eval_metric='mlogloss'
    )
    xgb_model.fit(X_train, y_train) # XGBoost handles unscaled data well

    y_pred_xgb = xgb_model.predict(X_test)

    print(f"Akurasi XGBoost: {accuracy_score(y_test, y_pred_xgb) * 100:.2f}%")
    print("Classification Report XGBoost:")
    print(classification_report(y_test, y_pred_xgb, target_names=le.classes_))
    
else:
    print("Path dataset tidak ditemukan. Harap ubah variabel 'dataset_folder'.")
    print("Pastikan struktur folder adalah: root/Kelas_Jamur/image.jpg")

Processing class: H1
Processing class: H2
Processing class: H3
Processing class: H5
Processing class: H6

Jumlah Data: 9114
Dimensi Fitur: 40

Sedang melatih model Random Forest...

=== HASIL EVALUASI ===
Akurasi: 89.96%

Classification Report:
              precision    recall  f1-score   support

          H1       0.88      0.97      0.92       884
          H2       0.91      0.80      0.85       486
          H3       0.89      0.72      0.80       155
          H5       0.96      0.92      0.94       157
          H6       0.96      0.96      0.96       141

    accuracy                           0.90      1823
   macro avg       0.92      0.88      0.90      1823
weighted avg       0.90      0.90      0.90      1823


=== 2. SUPPORT VECTOR MACHINE (SVM) ===
Sedang melatih model SVM...
Akurasi SVM: 65.77%
Classification Report SVM:
              precision    recall  f1-score   support

          H1       0.66      0.94      0.78       884
          H2       0.53      0.27      0.

In [17]:
# import joblib

# model_name = "rfmodel_n100.joblib"

# joblib.dump(rf_model, model_name)