In [None]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from torchvision import transforms
from transformers import BlipProcessor, BlipForConditionalGeneration
from sklearn.neighbors import NearestNeighbors

In [None]:
# Loading BLIP model for image captioning
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)

# Loading Universal Sentence Encoder (USE) for text vectorization
use_model = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")

# Loading images from folders
base_path = "Interior_images"
categories = ["Bathroom", "Bedroom","Dinning","Kitchen","Livingroom"]
image_paths = []
labels = []

for category in categories:
    folder_path = os.path.join(base_path, category)
    for filename in os.listdir(folder_path):
        if filename.endswith(('.png', '.jpg', '.jpeg')):
            image_paths.append(os.path.join(folder_path, filename))
            labels.append(category)

In [None]:
# Generating captions using BLIP with beam search
descriptions = []
def generate_caption(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        output = blip_model.generate(**inputs,)
    return processor.decode(output[0], skip_special_tokens=True).lower()
descriptions = [generate_caption(img) for img in image_paths]

In [None]:
# Convert text descriptions to vectors using USE
text_vectors = np.array(use_model(descriptions))

np.save("text_vectors.npy", text_vectors)
print("Text vectors saved successfully!")

In [None]:
import pandas as pd
df = pd.DataFrame({"Image Path": image_paths, "Description": descriptions})
df.to_csv("image_descriptions.csv", index=False)

In [None]:
# CNN model for image feature extraction
base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(224, 224, 3))
for layer in base_model.layers[-5:]:
    layer.trainable = True  # Unfreeze last 5 layers for fine-tuning

cnn_model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    BatchNormalization(),
    Dense(256, activation='relu'),
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dense(128)
])

In [None]:
def extract_image_features(image_path):
    image = load_img(image_path, target_size=(224, 224))
    image = img_to_array(image)
    image = np.expand_dims(image, axis=0)
    image = tf.keras.applications.resnet50.preprocess_input(image)
    features = cnn_model.predict(image)
    return features.flatten()

In [None]:
# Extract features for all images
image_features = np.array([extract_image_features(img) for img in image_paths])

In [None]:
# Train KNN model for text-based retrieval

knn_text = NearestNeighbors(n_neighbors=10, metric="minkowski", p = 3)
knn_text.fit(text_vectors)

knn_image = NearestNeighbors(n_neighbors=10, metric="cosine")
knn_image.fit(image_features)

In [None]:
# Function to retrieve images based on text query
def retrieve_images_text(query_text, category=None, max_results=5, knn_depth=10):
    query_vector = np.array(use_model([query_text]))
    distances, indices = knn_text.kneighbors(query_vector, n_neighbors=knn_depth)

    result_images = []
    for i in indices[0]:
        img_path = image_paths[i]

        if category and category.lower() not in img_path.lower():
            continue  # Apply category filter

        result_images.append(img_path)
        
        if len(result_images) >= max_results:
            break

    return result_images

In [None]:
def extract_image_features(image_path):
    image = Image.open(image_path).convert("RGB").resize((224, 224))
    image = np.expand_dims(np.array(image) / 255.0, axis=0)
    features = cnn_model.predict(image)
    return features.flatten()


# Function to retrieve similar images based on an uploaded image
def retrieve_images_from_image(uploaded_image, category=None, max_results=5, knn_depth=10):
    image_vector = extract_image_features(uploaded_image)  # Convert image to vector
    distances, indices = knn_image.kneighbors([image_vector.flatten()], n_neighbors=knn_depth)

    result_images = []
    for i in indices[0]:
        img_path = image_paths[i]

        # Ensure image belongs to the specified category
        if category and category.lower() not in img_path.lower():
            continue 
        
        result_images.append(img_path)
        
        if len(result_images) >= max_results:
            break

    return result_images

In [None]:
def show_images(image_list, title):
    fig, axes = plt.subplots(1, len(image_list), figsize=(20, 10))
    fig.suptitle(title, fontsize=16)

    for ax, img_path in zip(axes, image_list):
        image = Image.open(img_path).convert("RGB")
        ax.imshow(image)
        ax.axis("off")

    plt.show()
