In [None]:
pip install numpy opencv-python scikit-learn tensorflow keras scipy
!pip install PyWavelets
pip install torch torchvision pandas pillow

In [None]:
#working in google colab
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Define paths for input images and feature storage
input_path = '/content/drive/MyDrive/wangdataset/Images/'  # Update this to your images folder
output_path = '/content/drive/MyDrive/DeepLearning/'  # Update this to the folder for CSVs


In [None]:
import cv2
import numpy as np
import os
import pandas as pd

# Function to compute color histogram in HSV space
def compute_color_histogram(image, bins=(8, 2, 2)):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv_image], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256])
    hist = cv2.normalize(hist, hist).flatten()
    return hist

# Directory to save Color Histogram features
color_histogram_csv = os.path.join(output_path, 'color_histogram_features.csv')

# Process all images in the input directory
image_files = [f for f in os.listdir(input_path) if f.endswith(('.png', '.jpg', '.jpeg'))]
color_hist_features = []

for image_name in image_files:
    image_path = os.path.join(input_path, image_name)
    image = cv2.imread(image_path)
    image = cv2.resize(image, (224, 224))

    # Compute color histogram
    hist_features = compute_color_histogram(image)

    # Append to the list with image name
    color_hist_features.append([image_name] + hist_features.tolist())

# Save features to CSV
color_hist_df = pd.DataFrame(color_hist_features)
color_hist_df.to_csv(color_histogram_csv, index=False, header=False)
print(f'Color Histogram features saved to {color_histogram_csv}')


In [None]:
# Function to compute color moments (mean, std deviation)
def compute_color_moments(image):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    mean, std = [], []
    for channel in cv2.split(hsv_image):
        mean.append(np.mean(channel))
        std.append(np.std(channel))
    return mean + std

# Directory to save Color Moments features
color_moments_csv = os.path.join(output_path, 'color_moments_features.csv')

# Process all images for color moments
color_moments_features = []

for image_name in image_files:
    image_path = os.path.join(input_path, image_name)
    image = cv2.imread(image_path)
    image = cv2.resize(image, (224, 224))

    # Compute color moments
    moments_features = compute_color_moments(image)

    # Append to the list with image name
    color_moments_features.append([image_name] + moments_features)

# Save features to CSV
color_moments_df = pd.DataFrame(color_moments_features)
color_moments_df.to_csv(color_moments_csv, index=False, header=False)
print(f'Color Moments features saved to {color_moments_csv}')


In [None]:
import pywt

# Function to compute Wavelet Transform
def compute_wavelet_features(image, wavelet='db1', level=3):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    coeffs = pywt.wavedec2(gray_image, wavelet, level=level)
    # Use mean and standard deviation of coefficients at each level
    features = []
    for coeff in coeffs:
        if isinstance(coeff, tuple):  # for details coefficients (horizontal, vertical, diagonal)
            for subband in coeff:
                features.append(np.mean(subband))
                features.append(np.std(subband))
        else:  # for the approximation coefficients
            features.append(np.mean(coeff))
            features.append(np.std(coeff))
    return features

# Directory to save Wavelet Transform features
wavelet_csv = os.path.join(output_path, 'wavelet_features.csv')

# Process all images for wavelet features
wavelet_features = []

for image_name in image_files:
    image_path = os.path.join(input_path, image_name)
    image = cv2.imread(image_path)
    image = cv2.resize(image, (224, 224))

    # Compute wavelet features
    wavelet_features_list = compute_wavelet_features(image)

    # Append to the list with image name
    wavelet_features.append([image_name] + wavelet_features_list)

# Save features to CSV
wavelet_df = pd.DataFrame(wavelet_features)
wavelet_df.to_csv(wavelet_csv, index=False, header=False)
print(f'Wavelet features saved to {wavelet_csv}')


In [None]:
from skimage.feature import local_binary_pattern

# Function to compute LBP
def compute_lbp_features(image, radius=2, n_points=16):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    lbp = local_binary_pattern(gray_image, n_points, radius, method="uniform")
    # Create histogram of LBP
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    # Normalize the histogram
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)
    return hist

# Directory to save LBP features
lbp_csv = os.path.join(output_path, 'lbp_features.csv')

# Process all images for LBP features
lbp_features = []

for image_name in image_files:
    image_path = os.path.join(input_path, image_name)
    image = cv2.imread(image_path)
    image = cv2.resize(image, (224, 224))

    # Compute LBP features
    lbp_features_list = compute_lbp_features(image)

    # Append to the list with image name
    lbp_features.append([image_name] + lbp_features_list.tolist())

# Save features to CSV
lbp_df = pd.DataFrame(lbp_features)
lbp_df.to_csv(lbp_csv, index=False, header=False)
print(f'LBP features saved to {lbp_csv}')


In [None]:
# Combine Color Features (Color Histogram + Color Moments)
combined_color_csv = os.path.join(output_path, 'combined_color_features.csv')

# Load the color CSV files
color_hist_df = pd.read_csv(color_histogram_csv, header=None)
color_moments_df = pd.read_csv(color_moments_csv, header=None)

# Merge color features
combined_color_df = pd.concat([color_hist_df, color_moments_df.iloc[:, 1:]], axis=1)  # Remove duplicate image names
combined_color_df.to_csv(combined_color_csv, index=False, header=False)
print(f'Combined Color features saved to {combined_color_csv}')

# Combine Texture Features (Wavelet + LBP)
combined_texture_csv = os.path.join(output_path, 'combined_texture_features.csv')

# Load the texture CSV files
wavelet_df = pd.read_csv(wavelet_csv, header=None)
lbp_df = pd.read_csv(lbp_csv, header=None)

# Merge texture features
combined_texture_df = pd.concat([wavelet_df, lbp_df.iloc[:, 1:]], axis=1)  # Remove duplicate image names
combined_texture_df.to_csv(combined_texture_csv, index=False, header=False)
print(f'Combined Texture features saved to {combined_texture_csv}')

# Final Handcrafted Feature Combination (Color + Texture)
final_handcrafted_csv = os.path.join(output_path, 'final_handcrafted_features.csv')

# Merge Color and Texture features
final_handcrafted_df = pd.concat([combined_color_df, combined_texture_df.iloc[:, 1:]], axis=1)  # Remove duplicate image names
final_handcrafted_df.to_csv(final_handcrafted_csv, index=False, header=False)
print(f'Final Handcrafted features saved to {final_handcrafted_csv}')


In [None]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
import pandas as pd
import os

# Paths for dataset and feature output
input_path = '/content/drive/MyDrive/wangdataset/Images'  # Update this to the Corel 1K image folder path
output_path = '/content/drive/MyDrive/DeepLearning'  # Update this to the output folder path
os.makedirs(output_path, exist_ok=True)

# List all image files in the input directory
image_files = [f for f in os.listdir(input_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

# Load a pre-trained EfficientNet-B7 model
model = models.efficientnet_b7(pretrained=True)
# Remove the final classification layer to get features from the penultimate layer
model = nn.Sequential(*list(model.children())[:-1])
model.eval()

# Transformation to resize images to 600x600 and normalize them (EfficientNet-B7 recommended input size)
transform = transforms.Compose([
    transforms.Resize((600, 600)),  # EfficientNet B7 recommended input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Function to extract deep features using EfficientNet-B7
def extract_deep_features5(image_path, model):
    image = Image.open(image_path).convert('RGB')
    image = transform(image)
    image = image.unsqueeze(0)  # Add a batch dimension

    with torch.no_grad():
        features = model(image)

    # Flatten the features to a 1D vector
    features = features.view(features.size(0), -1).squeeze().numpy()
    return features

# Directory to save Deep Features
deep_features_csv5 = os.path.join(output_path, 'efficientB7.csv')

# Process all images for deep features
deep_features5 = []

for image_name in image_files:
    image_path = os.path.join(input_path, image_name)

    # Extract deep features
    deep_feature_vector5 = extract_deep_features5(image_path, model)

    # Append to the list with image name
    deep_features5.append([image_name] + deep_feature_vector5.tolist())

# Save deep features to CSV
deep_features_df5 = pd.DataFrame(deep_features5)
deep_features_df5.to_csv(deep_features_csv5, index=False, header=False)
print(f'Deep features saved to {deep_features_csv5}')


In [None]:
import pandas as pd
import os

# Paths for dataset and output (update these paths as needed)
output_path = '/content/drive/MyDrive/DeepLearning'  # Update this to the output folder path
final_handcrafted_csv = os.path.join(output_path, 'final_handcrafted_features.csv')  # Path to handcrafted feature CSV
deep_features_csv5 = os.path.join(output_path, 'efficientB7.csv')  # Path for EfficientNet-B7 deep features CSV

# Final combined feature CSV (Handcrafted + EfficientNet-B7)
final_combined_csv = os.path.join(output_path, 'final_combined_efficientB7.csv')

# Load the handcrafted and EfficientNet-B7 feature CSV files
handcrafted_df = pd.read_csv(final_handcrafted_csv, header=None)
deep_features_df5 = pd.read_csv(deep_features_csv5, header=None)

# Ensure the same order by merging on image names (first column)
combined_df = pd.merge(handcrafted_df, deep_features_df5, on=0)  # Merge using image names (first column)

# Save the final combined features to CSV
combined_df.to_csv(final_combined_csv, index=False, header=False)
print(f'Final combined features (Handcrafted + EfficientNet-B7) saved to {final_combined_csv}')


In [None]:
import pandas as pd

# File paths
csv_file_path = '/content/drive/MyDrive/DeepLearning/efficientB7.csv'  # Path to your combined CSV

# Load the CSV file without headers
df = pd.read_csv(csv_file_path, header=None)

# Determine the number of feature columns (excluding the filename)
num_features = df.shape[1] - 1

# Create the header with 'filename' and 'feature1', 'feature2', ..., 'featureN'
header = ['filename'] + [f'feature{i+1}' for i in range(num_features)]

# Insert the header as the first row
df.columns = header

# Save the updated DataFrame back to CSV with the new header
df.to_csv(csv_file_path, index=False)

# Print the first row to verify the headers
print("Updated CSV Headers:")
print(df.head(1))


In [None]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import (
    euclidean, cosine, canberra, cityblock, chebyshev, minkowski, mahalanobis, hamming, jaccard
)
from scipy.spatial import distance
from sklearn.preprocessing import MinMaxScaler

# Step 1: Load features
features_file = "/content/drive/MyDrive/DeepLearning/final_combined_efficientB7.csv"  # Update with your file path
data = pd.read_csv(features_file)

# Check if there is a column with filenames (adjust if needed)
filename_column = "filename"  # Replace with the actual name of the filename column
if filename_column not in data.columns:
    raise ValueError(f"Column '{filename_column}' not found in the dataset.")
# Extract filenames
filenames = data[filename_column].tolist()

# Handle 0s by replacing with a small epsilon if necessary
data = data.replace(0, np.finfo(float).eps)

# Ensure all columns are numeric (drop non-numeric columns)
numeric_data = data.select_dtypes(include=[np.number])

# Generate labels based on filenames
def generate_label_from_filename(filename):
    # Extract the numeric part from the filename (assuming filenames are like '0.jpg', '1.jpg', ...)
    image_number = int(filename.split('.')[0])
    # Determine the class based on the image number
    return (image_number // 100) + 1  # Class 1 for 0-99, Class 2 for 100-199, etc.

# Generate labels
labels = np.array([generate_label_from_filename(fname) for fname in filenames])

# Normalize features using Min-Max scaling
scaler = MinMaxScaler()
normalized_features = scaler.fit_transform(numeric_data.values)

# Regularize the covariance matrix if it's singular
def compute_inverse_covariance(features):
    try:
        # Compute the covariance matrix
        cov_matrix = np.cov(features.T)
        # Add a small regularization to the diagonal (identity matrix)
        regularization = 1e-5 * np.eye(cov_matrix.shape[0])
        regularized_cov = cov_matrix + regularization
        # Attempt to invert the regularized covariance matrix
        return np.linalg.inv(regularized_cov)
    except np.linalg.LinAlgError:
        print("Error: Even the regularized covariance matrix is singular.")
        return None

# Pre-compute the inverse covariance matrix for Mahalanobis distance
VI = compute_inverse_covariance(normalized_features)

# Define similarity metrics including normalized Euclidean
def calculate_similarity(metric, query_feature, all_features):
    try:
        if metric == "normalized_euclidean":
            return np.array([euclidean(query_feature, f) for f in all_features])  # Using normalized features
        elif metric == "cosine":
            return np.array([cosine(query_feature, f) for f in all_features])
        elif metric == "canberra":
            return np.array([canberra(query_feature, f) for f in all_features])
        elif metric == "manhattan":
            return np.array([cityblock(query_feature, f) for f in all_features])
        elif metric == "chebyshev":
            return np.array([chebyshev(query_feature, f) for f in all_features])
        elif metric == "minkowski":
            return np.array([minkowski(query_feature, f, 3) for f in all_features])  # p=3 for Minkowski
        elif metric == "mahalanobis" and VI is not None:
            return np.array([mahalanobis(query_feature, f, VI) for f in all_features])
        elif metric == "hamming":
            return np.array([hamming(query_feature, f) for f in all_features])
        elif metric == "jaccard":
            return np.array([jaccard(query_feature > 0, f > 0) for f in all_features])  # Convert to binary for Jaccard
        else:
            raise ValueError("Unknown metric or Mahalanobis distance not available")
    except Exception as e:
        print(f"Error in calculating {metric} distance: {e}")
        return np.full(len(all_features), np.inf)  # Return infinite distances on failure

# List of similarity metrics to evaluate, including normalized Euclidean
similarity_metrics = [
    "normalized_euclidean", "cosine", "canberra", "manhattan", "chebyshev",
    "minkowski", "mahalanobis", "hamming", "jaccard"
]

# Initialize storage for results
results = {metric: {
    "precision_10": [], "recall_10": [], "accuracy_10": [], "f1_10": [],
    "precision_20": [], "recall_20": [], "accuracy_20": [], "f1_20": [],
    "precision_30": [], "recall_30": [], "accuracy_30": [], "f1_30": [],
    "precision_40": [], "recall_40": [], "accuracy_40": [], "f1_40": [],
    "precision_50": [], "recall_50": [], "accuracy_50": [], "f1_50": []
} for metric in similarity_metrics}
total_images = len(normalized_features)

# Step 2: Evaluate for each image for top 10, 20, 30, 40, and 50 images
for idx in range(total_images):
    query_feature = normalized_features[idx]
    query_label = labels[idx]

    for metric in similarity_metrics:
        similarities = calculate_similarity(metric, query_feature, normalized_features)

        # Check for invalid similarity values
        if np.any(np.isnan(similarities)) or np.any(np.isinf(similarities)):
            print(f"Warning: Invalid values detected in {metric} similarities for image index {idx}")
            continue

        sorted_indices = np.argsort(similarities)
        predictions = labels[sorted_indices]  # Sorted labels based on similarity

        for top_k in [10, 20, 30, 40, 50]:
            # Total number of relevant images retrieved in the top_k
            relevant_retrieved = np.sum(predictions[:top_k] == query_label)

            # Calculate Precision, Recall, Accuracy, and F1 Score
            precision = relevant_retrieved / top_k  # Total images retrieved is top_k
            recall = relevant_retrieved / 100       # Total images in the class is 100
            accuracy = relevant_retrieved / top_k   # Accuracy is same as precision in this context
            f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

            # Store results
            results[metric][f"precision_{top_k}"].append(precision)
            results[metric][f"recall_{top_k}"].append(recall)
            results[metric][f"accuracy_{top_k}"].append(accuracy)
            results[metric][f"f1_{top_k}"].append(f1_score)

# Step 3: Calculate average metrics for each similarity measure without dictionary comprehension
average_results = {}

for metric in similarity_metrics:
    average_results[metric] = {
        "precision_10": np.mean(results[metric]["precision_10"]),
        "recall_10": np.mean(results[metric]["recall_10"]),
        "accuracy_10": np.mean(results[metric]["accuracy_10"]),
        "f1_10": np.mean(results[metric]["f1_10"]),
        "precision_20": np.mean(results[metric]["precision_20"]),
        "recall_20": np.mean(results[metric]["recall_20"]),
        "accuracy_20": np.mean(results[metric]["accuracy_20"]),
        "f1_20": np.mean(results[metric]["f1_20"]),
        "precision_30": np.mean(results[metric]["precision_30"]),
        "recall_30": np.mean(results[metric]["recall_30"]),
        "accuracy_30": np.mean(results[metric]["accuracy_30"]),
        "f1_30": np.mean(results[metric]["f1_30"]),
        "precision_40": np.mean(results[metric]["precision_40"]),
        "recall_40": np.mean(results[metric]["recall_40"]),
        "accuracy_40": np.mean(results[metric]["accuracy_40"]),
        "f1_40": np.mean(results[metric]["f1_40"]),
        "precision_50": np.mean(results[metric]["precision_50"]),
        "recall_50": np.mean(results[metric]["recall_50"]),
        "accuracy_50": np.mean(results[metric]["accuracy_50"]),
        "f1_50": np.mean(results[metric]["f1_50"])
    }

# Step 4: Print average results for top 10, 20, 30, 40, and 50
for metric, metrics_results in average_results.items():
    print(f"Metric: {metric}")
    for top_k in [10, 20, 30, 40, 50]:
        print(f"  Average Precision (Top {top_k}): {metrics_results[f'precision_{top_k}']:.4f}")
        print(f"  Average Recall (Top {top_k}): {metrics_results[f'recall_{top_k}']:.4f}")
        print(f"  Average Accuracy (Top {top_k}): {metrics_results[f'accuracy_{top_k}']:.4f}")
        print(f"  Average F1 Score (Top {top_k}): {metrics_results[f'f1_{top_k}']:.4f}")
    print()
