In [3]:
import os
import json
import numpy as np
import pandas as pd
from PIL import Image

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, average_precision_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import label_binarize

from imblearn.over_sampling import SMOTE
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
import time

# Load JSON annotations and labels
labelled_path = "/content/drive/MyDrive/Colab Notebooks/Labeled"
image_paths, labels = [], []

for filename in os.listdir(labelled_path):
    if filename.endswith(".json"):
        with open(os.path.join(labelled_path, filename), 'r') as f:
            data = json.load(f)
            if data['shapes']:
                label = data['shapes'][0]['label']
                image_file = data['imagePath']
                image_paths.append(os.path.join(labelled_path, image_file))
                labels.append(label)

df = pd.DataFrame({'image': image_paths, 'label': labels})
print("Label distribution:\n", df['label'].value_counts())

# Encode labels
label_to_idx = {label: idx for idx, label in enumerate(sorted(df['label'].unique()))}
df['label_idx'] = df['label'].map(label_to_idx)
idx_to_label = {v: k for k, v in label_to_idx.items()}
num_classes = len(label_to_idx)

# Load ResNet50 model
IMG_SIZE = 224
base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
model = Model(inputs=base_model.input, outputs=base_model.output)

# Extract features
X, y = [], []
for _, row in df.iterrows():
    try:
        img = Image.open(row['image']).convert('RGB').resize((IMG_SIZE, IMG_SIZE))
        img_array = np.expand_dims(preprocess_input(np.array(img)), axis=0)
        features = model.predict(img_array, verbose=0)
        X.append(features.flatten())
        y.append(row['label_idx'])
    except Exception as e:
        print(f"Error processing {row['image']}: {e}")

X = np.array(X)
y = np.array(y)

# Standardize before PCA
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Dimensionality reduction
pca = PCA(n_components=200)
X_pca = pca.fit_transform(X_scaled)

# SMOTE for class balancing
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_pca, y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, stratify=y_resampled, random_state=42
)

# Train Random Forest
clf = RandomForestClassifier(
    n_estimators=300,
    max_depth=30,
    class_weight='balanced_subsample',
    random_state=42,
    n_jobs=-1
)
clf.fit(X_train, y_train)
start_time = time.time()
y_pred = clf.predict(X_test)
end_time = time.time()
total_inference_time = end_time - start_time
avg_inference_time = total_inference_time / len(X_test)
y_prob = clf.predict_proba(X_test)

# Evaluation
target_names = [idx_to_label[i] for i in sorted(idx_to_label)]
print("\nFinal Random Forest Results with PCA + ResNet50 + SMOTE + Scaling:\n")
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=target_names))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# mAP calculation
y_test_bin = label_binarize(y_test, classes=list(range(num_classes)))
mAP = average_precision_score(y_test_bin, y_prob, average="macro")
print(f"\nMean Average Precision (mAP): {mAP:.4f}")

# Inference time results
print(f"\nTotal Inference Time: {total_inference_time:.4f} seconds")
print(f"Average Inference Time per Image: {avg_inference_time:.6f} seconds")

Label distribution:
 label
good           162
no_good        116
exc_solder      81
spike           38
poor_solder     31
Name: count, dtype: int64

Final Random Forest Results with PCA + ResNet50 + SMOTE + Scaling:

Classification Report:
               precision    recall  f1-score   support

  exc_solder       0.74      0.97      0.84        33
        good       0.62      0.41      0.49        32
     no_good       0.68      0.72      0.70        32
 poor_solder       0.97      0.97      0.97        33
       spike       0.97      0.94      0.95        32

    accuracy                           0.80       162
   macro avg       0.80      0.80      0.79       162
weighted avg       0.80      0.80      0.79       162

Confusion Matrix:
 [[32  0  0  0  1]
 [ 7 13 11  1  0]
 [ 1  8 23  0  0]
 [ 1  0  0 32  0]
 [ 2  0  0  0 30]]

Mean Average Precision (mAP): 0.8683

Total Inference Time: 0.0959 seconds
Average Inference Time per Image: 0.000592 seconds
