In [None]:
import cv2
import numpy as np
from skimage.feature import hog
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import matplotlib.pyplot as plt
from skimage import io

# Function to extract features from an image
def extract_features(image, img_size=128):
    # Resize image
    image = cv2.resize(image, (img_size, img_size))
   
    # Extract color histogram (normalized)
    hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
   
    # Convert image to grayscale for HOG
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
   
    # Extract HOG features
    hog_features, hog_image = hog(gray_image, pixels_per_cell=(8, 8), cells_per_block=(2, 2),
                                  visualize=True, feature_vector=True)
   
    # Extract mean color
    mean_color = cv2.mean(image)[:3]  # Ignore the alpha channel if present
   
    # Combine all features
    features = np.hstack([hist, hog_features, mean_color])
   
    return features

# Function to load dataset
def load_data(data_dir, img_size=128):
    data = []
    labels = []
    valid_extensions = ['.jpg', '.jpeg', '.png']  # Valid image extensions
   
    for root, dirs, files in os.walk(data_dir):
        for dir_name in dirs:
            category_path = os.path.join(root, dir_name)
            label = dir_name  # Folder name used as label
            for img in os.listdir(category_path):
                try:
                    img_path = os.path.join(category_path, img)
                   
                    # Check if the file has a valid image extension
                    if os.path.splitext(img_path)[1].lower() not in valid_extensions:
                        continue  # Skip non-image files
                   
                    img_array = cv2.imread(img_path, cv2.IMREAD_COLOR)
                    if img_array is not None:
                        features = extract_features(img_array, img_size)
                        data.append(features)
                        labels.append(label)
                    else:
                        print(f"Warning: Failed to load image {img_path}")
                except Exception as e:
                    print(f"Error loading image {img_path}: {e}")
    return np.array(data), np.array(labels)

# Load data
data_dir = r"D:\Summer Intern VNIT\dataset"
X, y = load_data(data_dir)

# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Naive Bayes model
nb_model = GaussianNB()
nb_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_train_pred = nb_model.predict(X_train_scaled)
y_test_pred = nb_model.predict(X_test_scaled)

train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

train_precision = precision_score(y_train, y_train_pred, average='weighted')
test_precision = precision_score(y_test, y_test_pred, average='weighted')

train_recall = recall_score(y_train, y_train_pred, average='weighted')
test_recall = recall_score(y_test, y_test_pred, average='weighted')

train_f1 = f1_score(y_train, y_train_pred, average='weighted')
test_f1 = f1_score(y_test, y_test_pred, average='weighted')

print(f'Training accuracy: {train_accuracy*100:.2f}%')
print(f'Test accuracy: {test_accuracy*100:.2f}%')

print(f'Training precision: {train_precision*100:.2f}%')
print(f'Test precision: {test_precision*100:.2f}%')

print(f'Training recall: {train_recall*100:.2f}%')
print(f'Test recall: {test_recall*100:.2f}%')

print(f'Training F1 score: {train_f1*100:.2f}%')
print(f'Test F1 score: {test_f1*100:.2f}%')

# Print classification report
print("Training Classification Report:")
print(classification_report(y_train, y_train_pred, target_names=label_encoder.classes_))

print("Testing Classification Report:")
print(classification_report(y_test, y_test_pred, target_names=label_encoder.classes_))

# Function to predict the class of a sample image
def predict_sample_image(image_path, model, label_encoder, img_size=128):
    img_array = cv2.imread(image_path, cv2.IMREAD_COLOR)
    if img_array is None:
        print(f"Error: Unable to load image at {image_path}")
        return None
   
    features = extract_features(img_array, img_size)
    features_scaled = scaler.transform(features.reshape(1, -1))
   
    prediction = model.predict(features_scaled)
    predicted_label = label_encoder.inverse_transform(prediction)
   
    return predicted_label[0]