<a href="https://colab.research.google.com/github/Megha2k/CSE508_Winter2024_A2_MT23125/blob/main/CSE508_Winter2024_A2_MT23125.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**DATA READ**

In [None]:
import cv2
import pandas as pd
import ast
import os
import numpy as np
import pickle
import numpy as np
from urllib.request import urlopen
from google.colab import drive
import re

# Function to perform basic image preprocessing
def preprocess_image(image, output_size=(256, 256)):
    # Check if the image is valid
    if image is None:
        print("Error: Failed to load image")
        return None

    # Resize the image
    resized_image = cv2.resize(image, output_size)

    # Convert the image to grayscale
    grayscale_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)

    return grayscale_image

# Function to preprocess the review text
def preprocess_text(text):
    if pd.isnull(text):  # Check for NaN values
        return ""

    # Remove line breaks and extra whitespace
    text = re.sub(r'\s+', ' ', text)

    return text.strip()

# Mount Google Drive
drive.mount('/content/drive')

# Directory to save the preprocessed images on Google Drive
output_directory = "/content/drive/My Drive/preprocessed_images/"

# Create the directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

# Path to the CSV file containing the dataset on Google Drive
csv_file_path = "/content/drive/My Drive/A2_Data.csv"

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Preprocess the review texts and replace them in the DataFrame
df['Review Text'] = df['Review Text'].apply(preprocess_text)

# Store the preprocessed DataFrame in a pickle file
df_path = "/content/drive/My Drive/preprocessed_dataframe.pickle"
with open(df_path, 'wb') as f:
    pickle.dump(df, f)

# Iterate over each row in the DataFrame
for index, row in df.iterrows():
    # Extract image URLs from the 'Image' column
    image_urls = ast.literal_eval(row['Image'])

    # Create a subdirectory for each product
    product_directory = os.path.join(output_directory, str(row['Product_ID']))
    os.makedirs(product_directory, exist_ok=True)

    # Iterate over each image URL
    for idx, url in enumerate(image_urls):
        try:
            # Load the image from URL
            resp = urlopen(url)
            image = np.asarray(bytearray(resp.read()), dtype="uint8")
            image = cv2.imdecode(image, cv2.IMREAD_COLOR)

            # Preprocess the image
            preprocessed_image = preprocess_image(image)

            if preprocessed_image is not None:
                # Save the preprocessed image
                image_path = os.path.join(product_directory, f"image_{idx}.jpg")
                cv2.imwrite(image_path, preprocessed_image)
            else:
                print(f"Skipping image {url} due to preprocessing error")
        except Exception as e:
            print(f"Error loading image from URL {url}: {str(e)}")

# Display a message when preprocessing is complete
print("File succesfully read, Image and Text preprocessing complete.")

# Path to the pickle file to store the DataFrame
pickle_file_path = "/content/drive/My Drive/dataframe.pickle"

# Store the DataFrame in a pickle file
with open(pickle_file_path, 'wb') as f:
    pickle.dump(df, f)

print("DataFrame stored in pickle file successfully.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Error loading image from URL https://images-na.ssl-images-amazon.com/images/I/71F3npeHUDL._SY88.jpg: HTTP Error 404: Not Found
Error loading image from URL https://images-na.ssl-images-amazon.com/images/I/71wHUWncMGL._SY88.jpg: HTTP Error 404: Not Found
Error loading image from URL https://images-na.ssl-images-amazon.com/images/I/71B8OOE5N8L._SY88.jpg: HTTP Error 404: Not Found
Error loading image from URL https://images-na.ssl-images-amazon.com/images/I/81SX3oAWbNL._SY88.jpg: HTTP Error 404: Not Found
Error loading image from URL https://images-na.ssl-images-amazon.com/images/I/718niQ1GEwL._SY88.jpg: HTTP Error 404: Not Found
Error loading image from URL https://images-na.ssl-images-amazon.com/images/I/61OboZT-kcL._SY88.jpg: HTTP Error 404: Not Found
Error loading image from URL https://images-na.ssl-images-amazon.com/images/I/710a2Pyh5lL._SY88.jpg: HTTP Err

In [None]:
!pip install numpy scikit-learn tensorflow



**IMAGE PREPROCESSING & FEATURE EXTRACTION**

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
import pickle

# Function to load and preprocess images with error handling
def load_and_preprocess_image(image_path, target_size=(224, 224)):
    try:
        img = cv2.imread(image_path)
        if img is None or img.size == 0:
            raise Exception("Failed to load image or image is empty")

        img = cv2.resize(img, target_size)
        img = img.astype(np.float32)
        img = img / 255.0  # Normalize pixel values to [0, 1]
        return img
    except Exception as e:
        print(f"Error loading or preprocessing image {image_path}: {str(e)}")
        return None

# Load the pre-trained VGG16 model
base_model = VGG16(weights='imagenet', include_top=True)
# We'll use the output of the second last layer (before the classification layer) as features
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc2').output)

# Directory containing preprocessed images
preprocessed_images_dir = "/content/drive/My Drive/preprocessed_images/"

# Load the preprocessed DataFrame from the pickle file
preprocessed_df_path = "/content/drive/My Drive/dataframe.pickle"
with open(preprocessed_df_path, 'rb') as f:
    df = pickle.load(f)

# List to store extracted features and corresponding labels
features = []
labels = []

# Iterate over each row in the DataFrame
for index, row in df.iterrows():
    # Extract image URLs from the 'Image' column
    image_urls = ast.literal_eval(row['Image'])

    # Iterate over each image URL
    for idx, url in enumerate(image_urls):
        # Construct the path to the preprocessed image
        image_path = os.path.join(preprocessed_images_dir, str(row['Product_ID']), f"image_{idx}.jpg")

        # Load and preprocess the image with error handling
        image = load_and_preprocess_image(image_path)

        # Ensure image loading and preprocessing is successful
        if image is not None:
            # Extract features using the pre-trained VGG16 model
            feature = model.predict(np.expand_dims(image, axis=0)).flatten()

            # Append features and label to the lists
            features.append(feature)
            labels.append(row['Product_ID'])

# Convert features and labels to numpy arrays
features = np.array(features)
labels = np.array(labels)

# Save the extracted features and labels using pickle
output_file = "/content/drive/My Drive/extracted_features.pickle"
with open(output_file, 'wb') as f:
    pickle.dump((features, labels), f)

print("Feature extraction complete and saved.")


Error loading or preprocessing image /content/drive/My Drive/preprocessed_images/2235/image_0.jpg: Failed to load image or image is empty
Error loading or preprocessing image /content/drive/My Drive/preprocessed_images/2235/image_1.jpg: Failed to load image or image is empty
Error loading or preprocessing image /content/drive/My Drive/preprocessed_images/3317/image_0.jpg: Failed to load image or image is empty
Error loading or preprocessing image /content/drive/My Drive/preprocessed_images/3317/image_1.jpg: Failed to load image or image is empty
Error loading or preprocessing image /content/drive/My Drive/preprocessed_images/2912/image_0.jpg: Failed to load image or image is empty
Error loading or preprocessing image /content/drive/My Drive/preprocessed_images/2265/image_0.jpg: Failed to load image or image is empty
Error loading or preprocessing image /content/drive/My Drive/preprocessed_images/2088/image_0.jpg: Failed to load image or image is empty
Error loading or preprocessing ima

In [13]:
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Load the extracted features and labels from the pickle file
input_file = "/content/drive/My Drive/extracted_features.pickle"
with open(input_file, 'rb') as f:
    features, labels = pickle.load(f)

# Normalize the extracted features
scaler = StandardScaler()
normalized_features = scaler.fit_transform(features)

# Save the normalized features and labels back to the pickle file
output_file = "/content/drive/My Drive/normalized_extracted_features.pickle"
with open(output_file, 'wb') as f:
    pickle.dump((normalized_features, labels), f)

print("Feature normalization complete and saved.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Feature normalization complete and saved.


**TEXT PREPROCESSING**

In [14]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
import string
import re

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Load the preprocessed DataFrame from the pickle file
preprocessed_df_path = "/content/drive/My Drive/dataframe.pickle"
with open(preprocessed_df_path, 'rb') as f:
    df = pickle.load(f)

# Function to perform text preprocessing
def preprocess_text(text):
    if pd.isnull(text):  # Check for NaN values
        return ""

    # Convert text to lowercase
    text = text.lower()

    # Tokenization
    tokens = word_tokenize(text)

    # Remove punctuation
    tokens = [token for token in tokens if token not in string.punctuation]

    # Remove stop words
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]

    # Stemming
    stemmer = PorterStemmer()
    tokens = [stemmer.stem(token) for token in tokens]

    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]

    # Join tokens back into a string
    preprocessed_text = ' '.join(tokens)

    return preprocessed_text

# Apply preprocessing to each review text
df['Preprocessed_Review'] = df['Review Text'].apply(preprocess_text)

# Store preprocessed reviews in a pickle file
path = "/content/drive/My Drive/preprocessed_reviews.pickle"
with open(path, 'wb') as f:
    pickle.dump(df['Preprocessed_Review'], f)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


**TF-IDF scores**

In [15]:
import pandas as pd
import numpy as np
import math
import pickle
import re

# Load the preprocessed DataFrame from the pickle file
preprocessed_df_path = "/content/drive/My Drive/dataframe.pickle"
with open(preprocessed_df_path, 'rb') as f:
    df = pickle.load(f)

# Replace NaN values in 'Review Text' column with empty strings
df['Review Text'] = df['Review Text'].fillna('')

# Tokenize and preprocess the reviews
df['Preprocessed_Review'] = df['Review Text'].apply(lambda x: re.findall(r'\b\w+\b', x.lower()))

# Function to calculate term frequency (TF) for a term in a document
def calculate_tf(term, document):
    term_count = document.count(term)
    total_terms = len(document)
    return term_count / total_terms if total_terms > 0 else 0

# Function to calculate inverse document frequency (IDF) for a term
def calculate_idf(term, documents):
    document_count = sum(1 for document in documents if term in document)
    total_documents = len(documents)
    return math.log10(total_documents / (1 + document_count))

# Function to calculate TF-IDF scores for terms in documents
def calculate_tfidf(documents):
    tfidf_scores = {}
    for i, document in enumerate(documents):
        tfidf_scores[i] = {}
        for term in set(document):
            tf = calculate_tf(term, document)
            idf = calculate_idf(term, documents)
            tfidf_scores[i][term] = tf * idf
    return tfidf_scores

# Calculate TF-IDF scores for the preprocessed reviews
tfidf_scores = calculate_tfidf(df['Preprocessed_Review'])

# Save TF-IDF scores using pickle
output_file = "/content/drive/My Drive/tfidf_scores.pickle"
with open(output_file, 'wb') as f:
    pickle.dump(tfidf_scores, f)

print("TF-IDF scores calculated and saved.")


TF-IDF scores calculated and saved.


**IMAGE COSINE SIMILARITY**

In [16]:
import numpy as np
import pickle

# Load the normalized features and labels
with open("/content/drive/My Drive/normalized_extracted_features.pickle", 'rb') as f:
    normalized_features, labels = pickle.load(f)

# Function to calculate cosine similarity between two vectors
def cosine_similarity(vector1, vector2):
    dot_product = np.dot(vector1, vector2)
    norm_vector1 = np.linalg.norm(vector1)
    norm_vector2 = np.linalg.norm(vector2)
    similarity = dot_product / (norm_vector1 * norm_vector2)
    return similarity

# Function to find most similar images for each image
def find_similar_images(feature_vectors, labels, top_k=3):
    similar_images = {}
    for i, feature_vector in enumerate(feature_vectors):
        # Get the product ID of the current image
        current_product_id = labels[i]
        # Calculate cosine similarity between the current feature vector and all other feature vectors
        similarities = []
        for j, other_feature_vector in enumerate(feature_vectors):
            if i != j:
                similarity = cosine_similarity(feature_vector, other_feature_vector)
                similarities.append((similarity, labels[j]))
        # Sort based on similarity scores in descending order
        similarities.sort(key=lambda x: x[0], reverse=True)
        # Extract top-k similar images with unique product IDs
        similar_images[current_product_id] = []
        unique_product_ids = set()
        for similarity, product_id in similarities:
            if len(similar_images[current_product_id]) >= top_k:
                break
            if product_id not in unique_product_ids:
                similar_images[current_product_id].append((product_id, similarity))
                unique_product_ids.add(product_id)
    return similar_images

# Find most similar images for each image
similar_images = find_similar_images(normalized_features, labels)

# Save the results using pickle
output_file = "/content/drive/My Drive/similar_images_results.pickle"
with open(output_file, 'wb') as f:
    pickle.dump(similar_images, f)

print("Similar images results saved.")


Similar images results saved.


**TEXT COSINE SIMILARITY**

In [17]:
import numpy as np
import pandas as pd
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the preprocessed reviews from the previous step
with open("/content/drive/My Drive/preprocessed_reviews.pickle", 'rb') as f:
    preprocessed_reviews = pickle.load(f)

# Load the preprocessed DataFrame from the pickle file
preprocessed_df_path = "/content/drive/My Drive/dataframe.pickle"
with open(preprocessed_df_path, 'rb') as f:
    df = pickle.load(f)

# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Fit the vectorizer on preprocessed reviews
tfidf_matrix = vectorizer.fit_transform(preprocessed_reviews)

# Function to find most similar reviews for each review
def find_similar_reviews(tfidf_matrix, product_ids, top_k=3):
    similar_reviews = {}
    num_reviews = tfidf_matrix.shape[0]

    # Iterate over each review
    for i, product_id in enumerate(product_ids):
        similarities = []
        # Calculate cosine similarity with all other reviews
        for j in range(num_reviews):
            if i != j:
                # Calculate cosine similarity
                similarity = cosine_similarity(tfidf_matrix[i], tfidf_matrix[j])[0][0]
                similarities.append((product_ids[j], similarity))

        # Sort similarities and store the top-k similar reviews
        similarities.sort(key=lambda x: x[1], reverse=True)
        similar_reviews[product_id] = similarities[:top_k]

    return similar_reviews

# Find most similar reviews for each review
product_ids = df['Product_ID'].tolist()
similar_reviews = find_similar_reviews(tfidf_matrix, product_ids)

# Save the results using pickle
output_file = "/content/drive/My Drive/similar_reviews_results.pickle"
with open(output_file, 'wb') as f:
    pickle.dump(similar_reviews, f)

print("Similar reviews results saved.")


Similar reviews results saved.


**AVERAGE COMPOSITE SIMILARITY SCORE**

In [18]:
import pickle
import numpy as np
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Load the results from pickle files
with open("/content/drive/My Drive/similar_images_results.pickle", 'rb') as f:
    similar_images = pickle.load(f)

with open("/content/drive/My Drive/similar_reviews_results.pickle", 'rb') as f:
    similar_reviews = pickle.load(f)

# Function to calculate average similarity score
def calculate_average_similarity(similarity_results):
    composite_similarity_scores = {}
    for key, value in similarity_results.items():
        composite_similarity_scores[key] = np.mean([score for _, score in value])
    return composite_similarity_scores

# Calculate average similarity scores for image similarity results
average_image_similarity = calculate_average_similarity(similar_images)

# Calculate average similarity scores for review similarity results
average_review_similarity = calculate_average_similarity(similar_reviews)

# Combine the average similarity scores for image and review into a single composite score
composite_similarity_scores = {}
for key in average_image_similarity.keys():
    composite_similarity_scores[key] = (average_image_similarity[key] + average_review_similarity[key]) / 2

# Save the composite similarity scores using pickle
output_file = "/content/drive/My Drive/composite_similarity_scores.pickle"
with open(output_file, 'wb') as f:
    pickle.dump(composite_similarity_scores, f)

print("Composite similarity scores calculated and saved.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Composite similarity scores calculated and saved.


**RANKING**

In [19]:
# Rank the pairs based on the composite similarity score
ranked_pairs = sorted(composite_similarity_scores.items(), key=lambda x: x[1], reverse=True)

# Save the ranked pairs using pickle
output_file_ranked = "/content/drive/My Drive/ranked_pairs_based_on_composite_similarity.pickle"
with open(output_file_ranked, 'wb') as f:
    pickle.dump(ranked_pairs, f)

print("Ranked pairs based on composite similarity score saved.")


Ranked pairs based on composite similarity score saved.


**RETRIEVAL**

In [20]:
import pandas as pd
import pickle
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the preprocessed reviews from the previous step
with open("/content/drive/My Drive/preprocessed_reviews.pickle", 'rb') as f:
    preprocessed_reviews = pickle.load(f)

# Load the preprocessed DataFrame from the pickle file
preprocessed_df_path = "/content/drive/My Drive/dataframe.pickle"
with open(preprocessed_df_path, 'rb') as f:
    df = pickle.load(f)

# Load the similar images and reviews results from the pickle files
with open("/content/drive/My Drive/similar_images_results.pickle", 'rb') as f:
    similar_images = pickle.load(f)

with open("/content/drive/My Drive/similar_reviews_results.pickle", 'rb') as f:
    similar_reviews = pickle.load(f)

# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Fit the vectorizer on preprocessed reviews
tfidf_matrix = vectorizer.fit_transform(preprocessed_reviews)

# Function to find the product ID for a given text review
def find_product_id_by_review(text_review):
    for index, row in df.iterrows():
        if row['Review Text'] == text_review:
            return row['Product_ID']
    return None

# Function to find the product ID for a given image URL
def find_product_id(image_url):
    for index, row in df.iterrows():
        image_urls = eval(row['Image'])
        if image_url in image_urls:
            return row['Product_ID']
    return None

# Function to find the top 5 similar images for a given image URL
def find_top_similar_images(image_url):
    product_id = find_product_id(image_url)
    if product_id is not None:
        top_similar_images_info = similar_images.get(product_id)
        if top_similar_images_info:
            # Fetch image URLs and corresponding product IDs
            similar_product_ids = [info[0] for info in top_similar_images_info]
            similar_image_urls = [row['Image'] for index, row in df.iterrows() if row['Product_ID'] in similar_product_ids]
            # Fetch corresponding text reviews
            similar_text_reviews = [row['Review Text'] for index, row in df.iterrows() if row['Product_ID'] in similar_product_ids]
            return list(zip(similar_product_ids, similar_image_urls, [info[1] for info in top_similar_images_info], similar_text_reviews))
    return None

# Function to find the top 5 similar text reviews for a given text review
def find_top_similar_reviews(text_review):
    text_review = re.sub(r'\s+', ' ', text_review).strip()
    product_id = find_product_id_by_review(text_review)
    if product_id is not None:
        top_similar_reviews_info = similar_reviews.get(product_id)
        if top_similar_reviews_info:
            # Fetch text reviews and corresponding product IDs
            similar_product_ids = [info[0] for info in top_similar_reviews_info]
            similar_text_reviews = [row['Review Text'] for index, row in df.iterrows() if row['Product_ID'] in similar_product_ids]
            # Fetch corresponding image URLs and similarity scores
            similar_image_urls = [row['Image'] for index, row in df.iterrows() if row['Product_ID'] in similar_product_ids]
            similarity_scores = [info[1] for info in top_similar_reviews_info]
            return list(zip(similar_product_ids, similar_text_reviews, similarity_scores, similar_image_urls))
    return None

# Function to calculate cosine similarity between two text reviews
def calculate_similarity(review1, review2):
    vector1 = vectorizer.transform([review1])
    vector2 = vectorizer.transform([review2])
    similarity = cosine_similarity(vector1, vector2)[0][0]
    return similarity

# Example usage
image_url_input = input("Enter the image URL: ")
text_review_input = input("Enter the text review: ")

# Find top 5 similar images for the input image URL
top_similar_images = find_top_similar_images(image_url_input)

# Find top 5 similar text reviews for the input text review
top_similar_reviews = find_top_similar_reviews(text_review_input)

# Print top 5 similar images
if top_similar_images:
    print("\nUSING IMAGE RETRIEVAL\n")
    for i, similar_image_info in enumerate(top_similar_images, 1):
        product_id, image_url, similarity_score, similar_text_review = similar_image_info
        print(f"{i}) Product ID: {product_id}")
        print(f"Image URL: {image_url}")
        print(f"Review: {similar_text_review}")
        print(f"Cosine similarity of images - {similarity_score:.4f}")
        # Calculate cosine similarity of text
        text_similarity_score = calculate_similarity(text_review_input, similar_text_review)
        print(f"Cosine similarity of text - {text_similarity_score:.3f}")
        # Calculate composite similarity score
        composite_similarity_score = (similarity_score + text_similarity_score) / 2
        print(f"Composite similarity score: {composite_similarity_score:.4f}")
        print()
else:
    print("No similar images found for the input image URL.")

# Print top 5 similar text reviews
if top_similar_reviews:
    print("\nUSING TEXT RETRIEVAL\n")
    for i, similar_review_info in enumerate(top_similar_reviews, 1):
        similar_product_id, similar_review_text, similarity_score_text, similar_image_url = similar_review_info
        # Calculate cosine similarity for images
        similarity_score_image = calculate_similarity(df[df['Image'].apply(lambda x: image_url_input in x)]['Review Text'].iloc[0], similar_review_text)
        composite_similarity_score = (similarity_score_text + similarity_score_image) / 2
        print(f"{i}) Product ID: {similar_product_id}")
        print(f"Image URL: {similar_image_url}")
        print(f"Review: {similar_review_text}")
        print(f"Cosine similarity of images - {similarity_score_image:.4f}")
        print(f"Cosine similarity of text - {similarity_score_text:.3f}")
        print(f"Composite similarity score: {composite_similarity_score:.4f}")
        print()
else:
    print("No similar text reviews found for the input text review.")

import pickle

# Load the ranked pairs based on composite similarity
with open("/content/drive/My Drive/ranked_pairs_based_on_composite_similarity.pickle", 'rb') as f:
    ranked_pairs = pickle.load(f)

# Define a function to preprocess the text
def preprocess_text(text):
    if pd.isnull(text):  # Check for NaN values
        return ""
    return re.sub(r'\s+', ' ', text).strip()

def find_top_similar_product(image_url, review_text):
    # Iterate through ranked pairs until finding top 3 similar products
    similar_products_info = []
    for pair in ranked_pairs:
        product_id = pair[0]
        composite_similarity_score = pair[1]  # Get the composite similarity score

        product_info = df[df['Product_ID'] == product_id].iloc[0]
        similar_products_info.append({
            'Product_ID': product_id,
            'Image_URL': product_info['Image'],
            'Review_Text': product_info['Review Text'],
            'Composite_Similarity_Score': composite_similarity_score
        })

        if len(similar_products_info) == 3:
            break

    return similar_products_info

# Find top 3 similar products
similar_products = find_top_similar_product(image_url_input, text_review_input)

# Print the results
print("\nCOMPOSITE RETRIEVAL\n")
for product in similar_products:
    print("Product ID:", product['Product_ID'])
    print("Image URL:", product['Image_URL'])
    print("Review Text:", product['Review_Text'])
    print("Composite Similarity Score:", product['Composite_Similarity_Score'])
    print()


Enter the image URL: https://images-na.ssl-images-amazon.com/images/I/81q5+IxFVUL._SY88.jpg
Enter the text review: Loving these vintage springs on my vintage strat. They have a good tension and great stability. If you are floating your bridge and want the most out of your springs than these are the way to go.

USING IMAGE RETRIEVAL

1) Product ID: 3637
Image URL: ['https://images-na.ssl-images-amazon.com/images/I/71nsBodxLXL._SY88.jpg', 'https://images-na.ssl-images-amazon.com/images/I/71WSo7HvxkL._SY88.jpg']
Review: Way better than I was expecting. I was recently upgrading all the parts on my first strat, and a buddy told me the trem system upgrade would be the biggest. I'm glad I listened to him and got this, because it was night and day. I'm not sure if it's just me, because I'm using my old springs, but my guitar stays in tune better now when using the trem bar. I uploaded a photo to show how small and light my old trem block was compared to this one. Side note...everything fit per