# **Q1 Image Feature Extraction**

**Image Download & Pre-processing**

Pre-processed images included

1.   Resize image 256*256 pixel
2.   Rotate Image by 90 degree clockwise
3.   Image Brightened
4.   Image Exposed
5.   Image Random Flip
6.   Image Blur

1 Given image ----> 6 Different Images into Lerning Dataset

In [1]:
import pandas as pd
import numpy as np
import os
import requests
import random
import cv2
import glob
import pickle

def resize_images(input_folder, output_folder, new_width, new_height):
	os.makedirs(output_folder, exist_ok=True)

	for filename in glob.glob(os.path.join(input_folder, '*.jpg')):
		img = cv2.imread(filename)
		resized_image = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_AREA)

		output_filename = output_folder + '/' + (filename.split('/')[-1])[:-4] + '_resized.jpg'
		cv2.imwrite(output_filename, resized_image)
	print("Images Resized Succesfully")

def adjust_brightness(input_folder, output_folder, brightness_factor=100.0):
    os.makedirs(output_folder, exist_ok=True)

    for filename in glob.glob(os.path.join(input_folder, '*.jpg')):
        img = cv2.imread(filename)
        # Convert to HSV (hue, saturation, value) color space
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)

        # Adjust brightness
        v = v.astype('float64')  # Convert to float to prevent data loss
        v += brightness_factor  # Add the brightness factor
        v = np.clip(v, 0, 255)  # Ensure the values are within [0, 255]
        v = v.astype('uint8')  # Convert back to uint8

        final_hsv = cv2.merge((h, s, v))
        brightened_image = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)

        # Save the brightened image
        output_filename = os.path.join(output_folder, os.path.basename(filename)[:-4] + '_brightened.jpg')
        cv2.imwrite(output_filename, brightened_image)

    print("Brightness adjusted for all images successfully.")

def adjust_exposure(input_folder, output_folder, exposure_factor=100.0):
    os.makedirs(output_folder, exist_ok=True)

    for filename in glob.glob(os.path.join(input_folder, '*.jpg')):
        img = cv2.imread(filename)
        # Convert to HSV (hue, saturation, value) color space
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)

        # Adjust exposure
        v = v.astype('float64')  # Convert to float to prevent data loss
        v *= exposure_factor  # Multiply by the exposure factor
        v = np.clip(v, 0, 255)  # Ensure the values are within [0, 255]
        v = v.astype('uint8')  # Convert back to uint8

        final_hsv = cv2.merge((h, s, v))
        exposed_image = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)

        # Save the exposed image
        output_filename = os.path.join(output_folder, os.path.basename(filename)[:-4] + '_exposed.jpg')
        cv2.imwrite(output_filename, exposed_image)

    print("Exposure adjusted for all images successfully.")

def random_flip_images(input_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    for filename in glob.glob(os.path.join(input_folder, '*.jpg')):
        img = cv2.imread(filename)

        # Randomly decide the flip code: 0 for vertical, 1 for horizontal, -1 for both axes
        flip_code = random.choice([0, 1, -1])
        flipped_image = cv2.flip(img, flip_code)

        # Save the flipped image
        output_filename = os.path.join(output_folder, os.path.basename(filename)[:-4] + '_flipped.jpg')
        cv2.imwrite(output_filename, flipped_image)

    print("Random flips applied to all images successfully.")

def blur_images(input_folder, output_folder, blur_strength=(5, 5)):
    os.makedirs(output_folder, exist_ok=True)

    for filename in glob.glob(os.path.join(input_folder, '*.jpg')):
        img = cv2.imread(filename)
        # Apply Gaussian blur
        blurred_image = cv2.GaussianBlur(img, blur_strength, 0)

        # Save the blurred image
        output_filename = os.path.join(output_folder, os.path.basename(filename)[:-4] + '_blurred.jpg')
        cv2.imwrite(output_filename, blurred_image)

    print("Gaussian blur applied to all images successfully.")

def rotate_images(input_folder, output_folder, rotation_angle):

    os.makedirs(output_folder, exist_ok=True)
    for filename in glob.glob(os.path.join(input_folder, '*.jpg')):
        img = cv2.imread(filename)

        if rotation_angle == 90:
            rotated_image = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
        elif rotation_angle == 180:
            rotated_image = cv2.rotate(img, cv2.ROTATE_180)
        elif rotation_angle == 270:
            rotated_image = cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)
        else:
            print(f"Invalid rotation angle: {rotation_angle}. Skipping {filename}.")
            continue

        output_filename = output_folder + '/' + (filename.split('/')[-1])[:-4] + '_{}_rotated.jpg'.format(rotation_angle)
        cv2.imwrite(output_filename, rotated_image)
    print("Images Rotated Succesfully")

def Download_img_CSV(file_path,folder_path):
  os.makedirs(folder_path, exist_ok=True)
  df = pd.read_csv(file_path)
  for index, row in df.iterrows():
    uid = row["Unnamed: 0"]
    images = row["Image"]
    text = row["Review Text"]
    count = 0
    for image in images[1:-1].split(','):
      url = image.strip()[1:-1]
      response = requests.get(url)
    if response.status_code == 200:
      with open(folder_path+'/{}_{}.jpg'.format(uid,count), 'wb') as f:
        f.write(response.content)
        count+= 1
  print("Images Downloaded Succesfully")

In [2]:
csv_path = '/content/drive/MyDrive/Colab Notebooks/IR A2/A2_Data.csv'
download_path = '/content/drive/MyDrive/Colab Notebooks/IR A2' + "/Download_Images"
input_path = '/content/drive/MyDrive/Colab Notebooks/IR A2' + "/Input_Images"
preprocessed_path = '/content/drive/MyDrive/Colab Notebooks/IR A2' + "/Preprocessed_Images"

Download_img_CSV(csv_path,download_path)
resize_images(download_path,input_path,256 ,256)
resize_images(download_path,preprocessed_path,256 ,256)

rotate_images(input_path, preprocessed_path, 90)
# rotate_images(input_path, preprocessed_path, 180)
# rotate_images(input_path, preprocessed_path, 270)

adjust_brightness(input_path, preprocessed_path)
adjust_exposure(input_path, preprocessed_path)
random_flip_images(input_path, preprocessed_path)
blur_images(input_path, preprocessed_path)

Images Downloaded Succesfully
Images Resized Succesfully
Images Resized Succesfully
Images Rotated Succesfully
Brightness adjusted for all images successfully.
Exposure adjusted for all images successfully.
Random flips applied to all images successfully.
Gaussian blur applied to all images successfully.


**Using RESNET18 pretrained model to extract features of a image**

In [3]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image

# Load the pre-trained ResNet-18 model
resnet_model = models.resnet18(pretrained=True)
resnet_model.eval()

# Define preprocessing transformations
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def extract_image_features(image_location):
    image = Image.open(image_location)
    image_tensor = preprocess(image)
    image_tensor = image_tensor.unsqueeze(0)  # Add batch dimension
    with torch.no_grad():
        features = resnet_model(image_tensor)
    return features

def features_image_folder(folder_path):
    image_db_feature = {}
    image_paths = os.listdir(folder_path)
    for image_name in image_paths:
        locate = os.path.join(folder_path, image_name)
        image_db_feature[image_name] = extract_image_features(locate)
    return image_db_feature

# Extract features from the images
images_features = features_image_folder(preprocessed_path)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 66.6MB/s]


**Image Feature Normalization & Saved into Pickle File**

In [4]:
import torch.nn.functional as F

# Normalize the features along a specific dimension (e.g., dimension 1)
for key in images_features.keys():
  images_features[key] = F.normalize(images_features[key], dim=1)

with open('/content/drive/MyDrive/Colab Notebooks/IR A2/'+'features.pkl', 'wb') as file:
    pickle.dump(images_features, file)

print("The array has been stored in pickle format as 'features.pkl'.")

The array has been stored in pickle format as 'features.pkl'.


# **Q2 Text TF-IDF**

In [5]:
import os
import nltk
import pickle
import math
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer
from string import punctuation
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

def token_punctuation(token):
    for ch in token:
        if ch in punctuation:
            return 1
    return 0

def stem_tokens(tokens):
    stemmer = PorterStemmer()
    stemmed_tokens = [stemmer.stem(token) for token in tokens]
    return stemmed_tokens

def lemmatize_tokens(tokens):
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return lemmatized_tokens

def preprocess_text(text):
    text = text.lower()
    text = BeautifulSoup(text, "html.parser").get_text() #Extra
    tokens = word_tokenize(text)
    tokens = [token for token in tokens if token.strip()] #Extra
    tokens = [token for token in tokens if not token_punctuation(token)]
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    tokens = lemmatize_tokens(tokens)
    tokens = stem_tokens(tokens)
    return tokens


def Download_txt_CSV(file_path):
    dt = {}
    df = pd.read_csv(file_path)
    for index, row in df.iterrows():
        uid = row["Unnamed: 0"]
        text = row["Review Text"]
        dt[uid] = preprocess_text(str(text))
    print("Input Text Extracted & Pre-Processed Succesfully")
    return dt

def calculate_tf(dt):
    tf = {}
    for doc_id in dt.keys():
        for word in dt[doc_id]:
            tf[tuple([doc_id,word])] = tf.get(tuple([doc_id,word]),0) + 1
        doc_len = len(dt[doc_id])
        for word in dt[doc_id]:
            tf[tuple([doc_id,word])] /= doc_len
    return tf

def calculate_idf(dt):
    df = {}
    unique_words = set()
    for doc in dt.values():
        for word in doc:
            unique_words.add(word)

    for word in unique_words:
        for doc in dt.values():
            if word in doc:
                df[word] = df.get(word,0) + 1
    # df --> doc freq of a term

    idf = {}
    N = len(dt.keys()) #Total #(Docs) in Corpus
    for word in unique_words:
      idf[word] = math.log(N / (df[word]+1) )

    return idf


def calculate_tf_idf(dt,tf,idf):
    tf_idf = {}
    Documents = dt.keys()
    for doc_id in Documents:
        doc_len = len(dt[doc_id])
        for word in dt[doc_id]:
            tf_idf[tuple([doc_id,word])] = tf[tuple([doc_id,word])] * idf[word]
    return tf_idf


file_path = '/content/drive/MyDrive/Colab Notebooks/IR A2/' + '/A2_Data.csv'
processed_text = Download_txt_CSV(file_path)
tf = calculate_tf(processed_text)
idf = calculate_idf(processed_text)
tf_idf = calculate_tf_idf(processed_text,tf,idf)

with open('/content/drive/MyDrive/Colab Notebooks/IR A2/'+'tf_idf.pkl', 'wb') as file:
    pickle.dump((idf,tf_idf), file)
print("The dict has been stored in pickle format as 'tf_idf.pkl'.")

# for key in tf_idf:
#   print(key,"--->",tf_idf)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
  text = BeautifulSoup(text, "html.parser").get_text() #Extra


Input Text Extracted & Pre-Processed Succesfully
The dict has been stored in pickle format as 'tf_idf.pkl'.


In [6]:
with open('/content/drive/MyDrive/Colab Notebooks/IR A2/'+'tf_idf.pkl', 'rb') as file:
    text_features = pickle.load(file)

print(text_features[0]) #    idf
print(text_features[1]) # tf_idf

{'sometim': 5.115995809754082, 'guiar': 6.214608098422191, 'xavier': 6.214608098422191, 'turner': 6.214608098422191, 'v': 4.961845129926823, 'understand': 4.961845129926823, 'me51': 6.214608098422191, 'cloth': 5.809142990314028, 'tlc': 6.214608098422191, 'mechan': 4.961845129926823, 'produc': 4.199705077879927, 'gbg': 6.214608098422191, 'ts9dx': 6.214608098422191, 'tripod': 5.298317366548036, 'shure': 4.422848629194137, 'best': 2.8473122684357177, 'motherboard': 6.214608098422191, 'pleasant': 5.521460917862246, 'bare': 4.605170185988092, 'podcast': 4.961845129926823, 'necessari': 5.298317366548036, 'hid': 6.214608098422191, 'jazz': 4.199705077879927, 'motor': 6.214608098422191, 'extent': 6.214608098422191, 'e': 4.710530701645918, 'bell': 5.809142990314028, 'exterior': 6.214608098422191, 'prior': 5.809142990314028, 'tub': 6.214608098422191, 'pattern': 4.710530701645918, 'omg': 6.214608098422191, 'chisel': 6.214608098422191, 'jumbo': 6.214608098422191, 'iphon': 5.809142990314028, 'giant'

# **Q3 Image Retrieval and Text Retrieval**

In [26]:
import pandas as pd
import numpy as np
import os
import requests
import random
import cv2
import glob
import pickle
import requests

# Your cosine_similarity function
def cosine(v1, v2):
    dot_product = np.dot(v1, v2)
    magn1 = np.linalg.norm(v1)
    magn2 = np.linalg.norm(v2)
    return dot_product / (magn1 * magn2)

def Download_CSV(file_path):
    data = {}                          # data[uid] = row{images:xxx, review:xxx}
    df = pd.read_csv(file_path)
    for index, row in df.iterrows():
        images = row["Image"]
        uid = row["Unnamed: 0"]
        text = row["Review Text"]
        data[uid] = {"Image":images,"Review":text}
    print("Input Text Extracted & Pre-Processed Succesfully")
    return data

#cosine([1,2,3],[1,2,3])
file_path = '/content/drive/MyDrive/Colab Notebooks/IR A2/' + '/A2_Data.csv'
data = Download_CSV(file_path)

Input Text Extracted & Pre-Processed Succesfully


In [27]:
# Input Query
link = ""
while True:
  print('Image:')
  link = input()
  response = requests.get(link)
  if response.status_code == 200:
    break
  else:
    print("Incorrect Link, Try Again")

query_img = '/content/drive/MyDrive/Colab Notebooks/IR A2/' + '/Query_Image.jpg'
response = requests.get(link)
with open(query_img, 'wb') as f:
  f.write(response.content)

print('Review:')
query_review = input()

# Sample Case
# https://images-na.ssl-images-amazon.com/images/I/81q5+IxFVUL._SY88.jpg
# Loving these vintage springs on my vintage strat. They have a good tension and great stability. If you are floating your bridge and want the most out of your springs than these are the way to go.

Image:
https://images-na.ssl-images-amazon.com/images/I/81q5+IxFVUL._SY88.jpg
Review:
# Loving these vintage springs on my vintage strat. They have a good tension and great stability. If you are floating your bridge and want the most out of your springs than these are the way to go.


**a) Image**

In [34]:
with open('/content/drive/MyDrive/Colab Notebooks/IR A2/'+'features.pkl', 'rb') as file:
    images_features = pickle.load(file)

query_img_features = extract_image_features(query_img)

cosine_image = {}
for key in images_features.keys():
    cosine_image[key] = cosine(query_img_features[0],images_features[key][0])

img_result = {k: v for k, v in sorted(cosine_image.items(), key=lambda item: item[1], reverse=True)}
uniq_img_result = {}
for img_name in img_result.keys():
    uid = int(img_name.split('_')[0])
    if uid not in uniq_img_result.keys():
        uniq_img_result[uid] = img_result[img_name]
    if len(uniq_img_result.keys())>=3:
        break



USING IMAGE RETRIEVAL
image url ---> ['https://images-na.ssl-images-amazon.com/images/I/81q5+IxFVUL._SY88.jpg']
review ---> Loving these vintage springs on my vintage strat. They have a good tension and great stability. If you are floating your bridge and want the most out of your springs than these are the way to go.
Text Cosine Similarity ---> 0.9031789039381403
IMAGE Cosine Similarity ---> 0.98645395

image url ---> ['https://images-na.ssl-images-amazon.com/images/I/71nSUnv7znL._SY88.jpg']
review ---> I bought the classical guitar case. It fits my Ruben Flores 1200 perfectly. I had my doubts for the low price. It's very solid constructed. It comes with two keys for a lockable latch. It has a nice surface texture that feels like leather. The whole thing feels really well-built.
Text Cosine Similarity ---> 0.0
IMAGE Cosine Similarity ---> 0.8306502

image url ---> ['https://images-na.ssl-images-amazon.com/images/I/71L6oKAiOEL._SY88.jpg', 'https://images-na.ssl-images-amazon.com/images

**b) Text**

In [29]:
def vectonize(query,doc,idf,tf_idf,doc_id):
    unique_words = set()
    unique_words.update(query)
    unique_words.update(doc)

    #Calculate TF of query and divide by total len(Query)
    tf_query = {}
    for word in query:
        tf_query[word] = tf_query.get(word,0) + 1
    N = len(query)
    for key in tf_query.keys():
        tf_query[key] = tf_query.get(word,0) / N

    #Vectonise
    query_score = []
    doc_score = []
    for word in unique_words:
        doc_score.append( tf_idf.get( (doc_id,word) , 0) )
        query_score.append( tf_query.get(word,0) * idf.get(word,0) )

    return query_score,doc_score

In [30]:
with open('/content/drive/MyDrive/Colab Notebooks/IR A2/'+'tf_idf.pkl', 'rb') as file:
    text_features = pickle.load(file)

idf = text_features[0]
tf_idf = text_features[1]

file_path = '/content/drive/MyDrive/Colab Notebooks/IR A2/' + '/A2_Data.csv'
documents = Download_txt_CSV(file_path)

query_review_tokens = preprocess_text(query_review)

cosine_review = {}
for doc_id in documents.keys():
    v1,v2 = vectonize(query_review_tokens,documents[doc_id],idf,tf_idf,doc_id)
    cosine_review[doc_id] = cosine(v1,v2)



  text = BeautifulSoup(text, "html.parser").get_text() #Extra


Input Text Extracted & Pre-Processed Succesfully
USING TEXT RETRIEVAL
image url ---> ['https://images-na.ssl-images-amazon.com/images/I/81q5+IxFVUL._SY88.jpg']
review ---> Loving these vintage springs on my vintage strat. They have a good tension and great stability. If you are floating your bridge and want the most out of your springs than these are the way to go.
TEXT Cosine Similarity ---> 0.9031789039381403

image url ---> ['https://images-na.ssl-images-amazon.com/images/I/81U3GJsTjNL._SY88.jpg', 'https://images-na.ssl-images-amazon.com/images/I/71TDWb-prbL._SY88.jpg']
review ---> Great Quality, adjustable tension. Well made.
TEXT Cosine Similarity ---> 0.286769418468529

image url ---> ['https://images-na.ssl-images-amazon.com/images/I/71bztfqdg+L._SY88.jpg']
review ---> I have been using Fender locking tuners for about five years on various strats and teles. Definitely helps with tuning stability and way faster to restring if there is a break.
TEXT Cosine Similarity ---> 0.201722

**OUTPUT**

In [38]:
print("****"*10,"USING IMAGE RETRIEVAL","****"*10)
for key in uniq_img_result.keys():
    print("image url --->",data[key]["Image"])
    print("review --->",data[key]["Review"])
    print("IMAGE Cosine Similarity --->", uniq_img_result[key])
    print("TEXT Cosine Similarity --->", cosine_review[key],end="\n\n")

print("****"*10,"USING TEXT RETRIEVAL","****"*10)
review_result = {k: v for k, v in sorted(cosine_review.items(), key=lambda item: item[1], reverse=True)}
i = 0
for key in review_result.keys():
    print("image url --->",data[key]["Image"])
    print("review --->",data[key]["Review"])
    print("TEXT Cosine Similarity --->", review_result[key],end="\n\n")
    i += 1
    if i>2:
        break

**************************************** USING IMAGE RETRIEVAL ****************************************
image url ---> ['https://images-na.ssl-images-amazon.com/images/I/81q5+IxFVUL._SY88.jpg']
review ---> Loving these vintage springs on my vintage strat. They have a good tension and great stability. If you are floating your bridge and want the most out of your springs than these are the way to go.
IMAGE Cosine Similarity ---> 0.98645395
TEXT Cosine Similarity ---> 0.9031789039381403

image url ---> ['https://images-na.ssl-images-amazon.com/images/I/71nSUnv7znL._SY88.jpg']
review ---> I bought the classical guitar case. It fits my Ruben Flores 1200 perfectly. I had my doubts for the low price. It's very solid constructed. It comes with two keys for a lockable latch. It has a nice surface texture that feels like leather. The whole thing feels really well-built.
IMAGE Cosine Similarity ---> 0.8306502
TEXT Cosine Similarity ---> 0.0

image url ---> ['https://images-na.ssl-images-amazon.co


# **Q4 Combined Retrieval (Text and Image)**

In [31]:
print(cosine_image)
print(cosine_review)
cosine_combined = {}
for image_name in cosine_image.keys():
    uid = int(image_name.split('_')[0])
    new_score = ( cosine_image[image_name] + cosine_review.get(uid,0) )/2
    old_score = cosine_combined.get(uid,0)
    cosine_combined[uid] = new_score if new_score>old_score else old_score
print(cosine_combined)

{'3452_0_resized.jpg': 0.98645395, '1205_0_resized.jpg': 0.66397536, '1708_0_resized.jpg': 0.6106159, '2078_0_resized.jpg': 0.71799076, '801_0_resized.jpg': 0.57465214, '126_0_resized.jpg': 0.49687678, '1329_0_resized.jpg': 0.57202107, '325_0_resized.jpg': 0.57822824, '245_0_resized.jpg': 0.5833889, '1714_0_resized.jpg': 0.5752549, '1743_0_resized.jpg': 0.5758683, '3710_0_resized.jpg': 0.5832452, '1664_0_resized.jpg': 0.624434, '394_0_resized.jpg': 0.538207, '1819_0_resized.jpg': 0.49214295, '672_0_resized.jpg': 0.3821527, '2740_0_resized.jpg': 0.6149075, '2836_0_resized.jpg': 0.55871683, '2453_0_resized.jpg': 0.70241153, '364_0_resized.jpg': 0.47570726, '818_0_resized.jpg': 0.56511194, '3705_0_resized.jpg': 0.3966152, '1890_0_resized.jpg': 0.45609918, '1572_0_resized.jpg': 0.6064987, '649_0_resized.jpg': 0.5855351, '3023_0_resized.jpg': 0.5886022, '527_0_resized.jpg': 0.45482585, '1039_0_resized.jpg': 0.6041173, '2543_0_resized.jpg': 0.44959614, '1191_0_resized.jpg': 0.40397665, '590_

In [32]:
combined_result = {k: v for k, v in sorted(cosine_combined.items(), key=lambda item: item[1], reverse=True)}
print("USING COMPOSITE RETRIEVAL")
i = 0
for key in combined_result.keys():
    print("image url --->",data[key]["Image"])
    print("review --->",data[key]["Review"])
    print("COMPOSITE Cosine Similarity --->",combined_result[key],end="\n\n")
    i += 1
    if i>2:
        break

USING COMPOSITE RETRIEVAL
image url ---> ['https://images-na.ssl-images-amazon.com/images/I/81q5+IxFVUL._SY88.jpg']
review ---> Loving these vintage springs on my vintage strat. They have a good tension and great stability. If you are floating your bridge and want the most out of your springs than these are the way to go.
COMPOSITE Cosine Similarity ---> 0.9448164271716306

image url ---> ['https://images-na.ssl-images-amazon.com/images/I/81U3GJsTjNL._SY88.jpg', 'https://images-na.ssl-images-amazon.com/images/I/71TDWb-prbL._SY88.jpg']
review ---> Great Quality, adjustable tension. Well made.
COMPOSITE Cosine Similarity ---> 0.44936327564242184

image url ---> ['https://images-na.ssl-images-amazon.com/images/I/71bztfqdg+L._SY88.jpg']
review ---> I have been using Fender locking tuners for about five years on various strats and teles. Definitely helps with tuning stability and way faster to restring if there is a break.
COMPOSITE Cosine Similarity ---> 0.44169626355361974



# **Q5  Results and Analysis**

*a.* **Present the top-ranked (image, review) pairs along with the cosine similarity scores.**

--->
    
    RANK 1: IMAGE RETRIEVAL
    RANK 2: COMBINED (IMAGE + TEXT) RETRIEVAL
    RANK 3: TEXT RETRIEVAL




*b.* **Observe which out of the two retrieval techniques gives a better similarity score and argue the reason.**

--->

Most efficient result provided by Image Technique as it maps 1-1 of 2 image's feature extracted from AI model and find cosine similarity whereas the TF-IDF score are made over corpus and cosine similarity of text's TF-IDF score have been influnced by Query_text,Current_Doc_text and also by other documents of corpus (IDF Score)

Thus, even if we have an exact match, we can’t get cosine similarity ~1 in Text retrieval.

Eg: for doc_id = 3452

    IMAGE Cosine Similarity ---> 0.98645395
    COMPOSITE Cosine Similarity ---> 0.9448164271716306
    TEXT Cosine Similarity ---> 0.9031789039381403




*c.* **Discuss the challenges faced and potential improvements in the retrieval process.**

-->

*Challenges faced during retrieval:*

1. Semantic Gap: The gap between low-level features (e.g., image pixels) and high-level semantics (e.g., user intent) affects retrieval accuracy.
2. Data Variability: Variations in lighting, angles, and image quality impact feature extraction.
3. Feature Extraction: Efficient and robust feature extraction from images and text is crucial.


*Potential improvements:*
1. Fine-tuning Models: Continuously train models on diverse data to improve feature extraction.
2. Hybrid Approaches: Combine image and text features more effectively.
Semantic Embeddings: Use embeddings that bridge the semantic gap.
3. User Feedback: Incorporate user feedback to refine retrieval results.
