In [2]:
import os
import json
import cv2
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from skimage.feature import hog
import albumentations as A

IMG_SIZE = 128  # Smaller size for HOG efficiency
DATA_FOLDER = '/content/drive/MyDrive/Colab Notebooks/Labeled'

# --- Augmentation ---
augment = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.Rotate(limit=15, p=0.3),
])

# --- Load and extract HOG features ---
from skimage.color import rgb2gray

def extract_hog_features(img):
    gray = rgb2gray(img)  # Convert RGB to grayscale
    features = hog(
        gray,
        orientations=9,
        pixels_per_cell=(8, 8),
        cells_per_block=(2, 2),
        block_norm='L2-Hys',
        visualize=False  # No need for visualization here
    )
    return features


def load_images(data_folder):
    X, y = [], []
    for file in os.listdir(data_folder):
        if file.endswith('.json'):
            with open(os.path.join(data_folder, file)) as f:
                label_data = json.load(f)
            label = label_data['shapes'][0]['label']
            image_path = os.path.join(data_folder, label_data['imagePath'])

            if os.path.exists(image_path):
                img = cv2.imread(image_path)
                img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                # Original
                X.append(extract_hog_features(img))
                y.append(label)

                # Augmented
                img_aug = augment(image=img)['image']
                X.append(extract_hog_features(img_aug))
                y.append(label)

    return np.array(X), np.array(y)

# Load data
X, y = load_images(DATA_FOLDER)

# Label encoding
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42)

# Hyperparameter tuning
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'max_features': ['sqrt', 'log2']
}

rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(rf, param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

# Best model
best_rf = grid_search.best_estimator_

# Predict and evaluate
y_pred = best_rf.predict(X_test)

print("Classification Report (Random Forest + HOG + Augmentation):")
print(classification_report(y_test, y_pred, target_names=le.classes_))


Fitting 3 folds for each of 48 candidates, totalling 144 fits
Classification Report (Random Forest + HOG + Augmentation):
              precision    recall  f1-score   support

  exc_solder       0.71      0.67      0.69        33
        good       0.55      0.73      0.63        66
     no_good       0.68      0.68      0.68        47
 poor_solder       1.00      0.38      0.56        13
       spike       1.00      0.27      0.42        15

    accuracy                           0.64       174
   macro avg       0.79      0.55      0.59       174
weighted avg       0.69      0.64      0.63       174



In [1]:
import os
import json
import cv2
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from skimage.feature import hog
import albumentations as A

IMG_SIZE = 224 # Smaller size for HOG efficiency
DATA_FOLDER = '/content/drive/MyDrive/Colab Notebooks/Labeled'

# --- Augmentation ---
augment = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.Rotate(limit=15, p=0.3),
])

# --- Load and extract HOG features ---
from skimage.color import rgb2gray

def extract_hog_features(img):
    gray = rgb2gray(img)  # Convert RGB to grayscale
    features = hog(
        gray,
        orientations=9,
        pixels_per_cell=(8, 8),
        cells_per_block=(2, 2),
        block_norm='L2-Hys',
        visualize=False  # No need for visualization here
    )
    return features


def load_images(data_folder):
    X, y = [], []
    for file in os.listdir(data_folder):
        if file.endswith('.json'):
            with open(os.path.join(data_folder, file)) as f:
                label_data = json.load(f)
            label = label_data['shapes'][0]['label']
            image_path = os.path.join(data_folder, label_data['imagePath'])

            if os.path.exists(image_path):
                img = cv2.imread(image_path)
                img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                # Original
                X.append(extract_hog_features(img))
                y.append(label)

                # Augmented
                img_aug = augment(image=img)['image']
                X.append(extract_hog_features(img_aug))
                y.append(label)

    return np.array(X), np.array(y)

# Load data
X, y = load_images(DATA_FOLDER)

# Label encoding
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42)

# Hyperparameter tuning
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'max_features': ['sqrt', 'log2']
}

rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(rf, param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

# Best model
best_rf = grid_search.best_estimator_

from sklearn.metrics import average_precision_score
from sklearn.preprocessing import label_binarize

# Predict class labels
y_pred = best_rf.predict(X_test)

# Classification report
print("Classification Report (Random Forest + HOG + Augmentation):")
print(classification_report(y_test, y_pred, target_names=le.classes_))

# Binarize true labels
y_test_bin = label_binarize(y_test, classes=np.arange(len(le.classes_)))

# Predict probabilities for mAP
y_prob = best_rf.predict_proba(X_test)

# Compute Average Precision (AP) for each class
ap_per_class = []
for i in range(len(le.classes_)):
    ap = average_precision_score(y_test_bin[:, i], y_prob[:, i])
    ap_per_class.append(ap)

# Compute mean Average Precision (mAP)
mean_ap = np.mean(ap_per_class)

# Print AP per class and mAP
print("\nAverage Precision (per class):")
for label, ap in zip(le.classes_, ap_per_class):
    print(f"{label}: {ap:.4f}")

print(f"\nMean Average Precision (mAP): {mean_ap:.4f}")



Fitting 3 folds for each of 48 candidates, totalling 144 fits
Classification Report (Random Forest + HOG + Augmentation):
              precision    recall  f1-score   support

  exc_solder       0.71      0.52      0.60        33
        good       0.52      0.76      0.62        66
     no_good       0.58      0.55      0.57        47
 poor_solder       1.00      0.15      0.27        13
       spike       1.00      0.47      0.64        15

    accuracy                           0.59       174
   macro avg       0.76      0.49      0.54       174
weighted avg       0.65      0.59      0.57       174


Average Precision (per class):
exc_solder: 0.7974
good: 0.7992
no_good: 0.7463
poor_solder: 0.3169
spike: 0.8026

Mean Average Precision (mAP): 0.6925
