In [None]:
import os
import pandas as pd
import cv2
import numpy as np

# Paths and parameters
output_train_dir = "processed_train_images"
output_test_dir = "processed_test_images"
train_csv = "train_dataset.csv"
test_csv = "test_dataset.csv"
lower_skin = np.array([0, 20, 70], dtype=np.uint8)
upper_skin = np.array([20, 255, 255], dtype=np.uint8)

# Ensure output directories exist
os.makedirs(output_train_dir, exist_ok=True)
os.makedirs(output_test_dir, exist_ok=True)

def preprocess_and_save_image(img_path, output_dir):
    """Preprocesses an image and saves it to the output directory."""
    image = cv2.imread(img_path)
    if image is None:
        print(f"Image not found: {img_path}")
        return None

    # Skin detection and preprocessing
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    mask = cv2.inRange(hsv, lower_skin, upper_skin)
    mask = cv2.erode(mask, None, iterations=2)
    mask = cv2.dilate(mask, None, iterations=2)
    masked = cv2.bitwise_and(image, image, mask=mask)
    gray = cv2.cvtColor(masked, cv2.COLOR_BGR2GRAY)

    # Save the processed image
    output_path = os.path.join(output_dir, os.path.basename(img_path))
    cv2.imwrite(output_path, gray)
    return output_path

def process_dataset(csv_file, output_dir):
    """Processes a dataset and saves images to the output directory."""
    df = pd.read_csv(csv_file)
    processed_paths = []
    for _, row in df.iterrows():
        processed_path = preprocess_and_save_image(row['image_path'], output_dir)
        processed_paths.append(processed_path if processed_path else row['image_path'])
    df['image_path'] = processed_paths
    return df

# Process train and test datasets
train_df = process_dataset(train_csv, output_train_dir)
test_df = process_dataset(test_csv, output_test_dir)

# Save updated CSVs
train_df.to_csv("train_processed.csv", index=False)
test_df.to_csv("test_processed.csv", index=False)

print("Preprocessing complete. Processed images saved in train and test folders.")

In [None]:
import cv2
import os
import pandas as pd
import numpy as np

# Paths to processed images and CSV files
train_csv = "train_processed.csv"
test_csv = "test_processed.csv"
orb = cv2.ORB_create(nfeatures=500)

# Data containers
all_descriptors = []
labels = []
image_paths = []

def process_images_from_csv(csv_file):
    """
    Processes images based on paths and labels from a CSV file.

    Args:
    - csv_file (str): Path to the CSV file containing image paths and labels.

    Returns:
    - descriptors (list): List of ORB descriptors for the images.
    - labels (list): List of numeric labels corresponding to the images.
    - paths (list): List of processed image paths.
    """
    descriptors = []
    labels = []
    paths = []

    if not os.path.exists(csv_file):
        print(f"CSV file not found: {csv_file}")
        return descriptors, labels, paths

    # Load the CSV file
    df = pd.read_csv(csv_file)
    for _, row in df.iterrows():
        img_path = row['image_path']
        label = row['label']
        
        # Read and process the image
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if image is None:
            print(f"Image not found or invalid: {img_path}")
            continue

        # Extract ORB features
        keypoints, desc = orb.detectAndCompute(image, None)
        if desc is not None:
            descriptors.append(desc)
            labels.append(label)
            paths.append(img_path)

        print(f"Processed: {img_path} | Keypoints: {len(keypoints)}" if keypoints else f"Failed: {img_path}")

    return descriptors, labels, paths

# Process train and test datasets
print("Processing train dataset...")
train_descriptors, train_labels, train_paths = process_images_from_csv(train_csv)

print("Processing test dataset...")
test_descriptors, test_labels, test_paths = process_images_from_csv(test_csv)

# Combine all data
all_descriptors.extend(train_descriptors + test_descriptors)
labels.extend(train_labels + test_labels)
image_paths.extend(train_paths + test_paths)
# Save ORB results to a .npz file for later use
np.savez(
    "orb_features_results.npz",
    descriptors=np.array(all_descriptors, dtype=object),
    labels=np.array(labels),
    image_paths=np.array(image_paths)
)
print("ORB features, labels, and image paths saved to orb_features_results.npz")
# Debugging output
print("Processing complete.")
print(f"Total images processed: {len(image_paths)}")
print(f"Number of descriptors: {len(all_descriptors)}")
print(f"Unique labels: {len(set(labels))}")


In [3]:
from sklearn.cluster import KMeans
all_descriptors
# Combine all descriptors into one array
all_descriptors_stacked = np.vstack(all_descriptors)

# Choose number of clusters
num_clusters = 500
 
# Fit KMeans
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(all_descriptors_stacked)



In [None]:
import joblib

joblib.dump(kmeans, "kmeans_model.joblib")
print("KMeans model saved to kmeans_model.joblib")

In [5]:
def build_histogram(descriptors, kmeans_model, num_clusters):
    hist = np.zeros(num_clusters)
    cluster_result = kmeans_model.predict(descriptors)
    for i in cluster_result:
        hist[i] += 1
    return hist

X = []
y = []

for i, descriptors in enumerate(all_descriptors):
    hist = build_histogram(descriptors, kmeans, num_clusters)
    X.append(hist)
    y.append(labels[i])

X = np.array(X)
y = np.array(y)


In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import KFold, cross_val_predict
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Define the model
clf = SVC(kernel='linear', random_state=42)

# K-Fold setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Cross-validated predictions
y_pred = cross_val_predict(clf, X, y, cv=kf)

# Evaluation metrics
acc = accuracy_score(y, y_pred)
prec = precision_score(y, y_pred, average='macro')
rec = recall_score(y, y_pred, average='macro')
f1 = f1_score(y, y_pred, average='macro')
cm = confusion_matrix(y, y_pred)

# Print results
print("📊 Evaluation Metrics:")
print(f"Accuracy:  {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall:    {rec:.4f}")
print(f"F1-score:  {f1:.4f}\n")

print("🧾 Classification Report:")
print(classification_report(y, y_pred))

print("📉 Confusion Matrix:")
print(cm)
