In [None]:
import os
import cv2
import numpy as np
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support
import matplotlib.pyplot as plt
from skimage import io

# Function to extract features from an image
def extract_features(image, img_size=128):
    # Resize image
    image = cv2.resize(image, (img_size, img_size))
    
    # Extract color histogram (normalized)
    hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    
    # Convert image to grayscale for HOG
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Extract HOG features
    hog_features, hog_image = hog(gray_image, pixels_per_cell=(8, 8), cells_per_block=(2, 2),
                                  visualize=True, feature_vector=True)
    
    # Extract mean color
    mean_color = cv2.mean(image)[:3]  # Ignore the alpha channel if present
    
    # Combine all features
    features = np.hstack([hist, hog_features, mean_color])
    
    return features

# Function to load and preprocess data
def load_data(data_dir, img_size=128):
    data = []
    labels = []
    for root, dirs, files in os.walk(data_dir):
        for dir_name in dirs:
            category_path = os.path.join(root, dir_name)
            label = dir_name  # Folder name used as label
            for img in os.listdir(category_path):
                try:
                    img_path = os.path.join(category_path, img)
                    img_array = cv2.imread(img_path, cv2.IMREAD_COLOR)
                    if img_array is not None:
                        features = extract_features(img_array, img_size)
                        data.append(features)
                        labels.append(label)
                    else:
                        print(f"Warning: Failed to load image {img_path}")
                except Exception as e:
                    print(f"Error loading image {img_path}: {e}")
    return np.array(data), np.array(labels)

# Load data
data_dir = r"D:\Summer Intern VNIT\dataset"
X, labels = load_data(data_dir)

# Check if data is loaded correctly
if len(X) == 0 or len(labels) == 0:
    raise ValueError("No data found. Please check the data directory and ensure it contains images.")

# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(labels)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build and train the Random Forest model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Evaluate the model on the training set
y_train_pred = rf.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f'Training accuracy: {train_accuracy*100:.2f}%')

# Evaluate the model on the test set
y_pred = rf.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)
print(f'Test accuracy: {test_accuracy*100:.2f}%')

# Calculate and print the classification report
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Calculate overall precision, recall, and F1-score
precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')
print(f'Overall Precision: {precision:.2f}')
print(f'Overall Recall: {recall:.2f}')
print(f'Overall F1-Score: {f1_score:.2f}')

# Function to predict the class of a new sample image
def predict_sample_image(image_path, model, label_encoder, img_size=128):
    img_array = cv2.imread(image_path, cv2.IMREAD_COLOR)
    if img_array is None:
        print(f"Error: Unable to load image at {image_path}")
        return
    
    features = extract_features(img_array, img_size)
    features = np.array(features).reshape(1, -1)  # Reshape for prediction
    
    prediction = model.predict(features)
    predicted_label = label_encoder.inverse_transform(prediction)
    
    # Display the input image with predicted label
    image = io.imread(image_path)
    plt.imshow(image)
    plt.title(f'Classified as: {predicted_label[0]} using Random Forest')
    plt.axis('off')
    plt.show()

    return predicted_label[0]

# Path to an external image for prediction
external_image_path = r'D:\Summer Intern VNIT\dataset\Apple_Bad\IMG_20190910_172802_1.jpg'

# Predict the class of the external image
predicted_class = predict_sample_image(external_image_path, rf, label_encoder)
if predicted_class:
    print(f'The external image is classified as: {predicted_class} using Random Forest')