In [None]:
import csv
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
from gensim.models import Word2Vec
import random
import matplotlib.patches as mpatches
from heapq import heappush, heappop

# Create a OneHotEncoder instance
encoder = OneHotEncoder(sparse=False)
csv_file = "amazon.csv"

# Read the CSV data and create a dictionary of products
products_dict = {}
with open(csv_file, mode='r', encoding='utf-8') as file:
    csv_reader = csv.DictReader(file)
    for row in csv_reader:
        # Extract and clean the rating_count
        try:
            rating_count = int(row['rating_count'].replace(',', '')) if row['rating_count'] else 0
        except ValueError:
            rating_count = 0

        # Extract and clean the rating
        try:
            rating = float(row['rating'])
        except ValueError:
            rating = 0.0

        # Create a dictionary for each product
        product_details = {
            'product_id': row['product_id'],
            'product_name': row['product_name'],
            'category': row['category'],
            'rating': rating,
            'rating_count': rating_count,
            'about_product': row['about_product']
        }
        # Use product_id as the key for the products dictionary
        products_dict[row['product_id']] = product_details

# Print available product IDs
print("Available product IDs:", list(products_dict.keys())[:10])  # Print first 10 product IDs for checking

# Construct the grid
num_products = len(products_dict)
grid_size = int(np.ceil(np.sqrt(num_products)))
grid = np.empty((grid_size, grid_size), dtype=object)

# Calculate row indices using hybrid filtering
categories = np.array([p['category'] for p in products_dict.values()]).reshape(-1, 1)
category_encoded = encoder.fit_transform(categories)

features = np.array([[p['rating'], p['rating_count']] for p in products_dict.values()])
features = np.concatenate((category_encoded, features), axis=1)

scaler = MinMaxScaler()
row_indices = scaler.fit_transform(features)

# Calculate column indices using Word2Vec embeddings for better semantic similarity
corpus = [p['about_product'].split() for p in products_dict.values()]
word2vec_model = Word2Vec(sentences=corpus, vector_size=50, window=5, min_count=1, workers=4)
product_vectors = np.array([np.mean([word2vec_model.wv[word] for word in words if word in word2vec_model.wv]
                                    or [np.zeros(50)], axis=0) for words in corpus])

column_indices = scaler.fit_transform(product_vectors)

# Normalize indices to fit within grid size
row_indices = (row_indices[:, 0] * (grid_size - 1)).astype(int)
column_indices = (column_indices[:, 0] * (grid_size - 1)).astype(int)

# Shuffle the product IDs to ensure a random distribution
product_ids = list(products_dict.keys())
random.shuffle(product_ids)

# Place products in the grid based on normalized indices
for idx, product_id in enumerate(product_ids):
    row = row_indices[idx]
    col = column_indices[idx]
    while grid[row, col] is not None:
        col = (col + 1) % grid_size
        if col == 0:
            row = (row + 1) % grid_size
    grid[row, col] = product_id
    print(f"Product ID: {product_id}, Row: {row}, Column: {col}")  # Print statement for checking

# A* search functions
def calculate_similarity(product1, product2):
    vectorizer = TfidfVectorizer()
    text_vectors = vectorizer.fit_transform([product1['about_product'], product2['about_product']])
    cosine_sim = cosine_similarity(text_vectors[0:1], text_vectors[1:2])[0][0]
    rating_similarity = min(product1['rating'], product2['rating']) / max(product1['rating'], product2['rating'])
    rating_count_similarity = min(product1['rating_count'], product2['rating_count']) / max(product1['rating_count'], product2['rating_count'])
    similarity_score = cosine_sim + rating_similarity + rating_count_similarity
    return similarity_score

def a_star_search_recommendations(start, grid, products_dict, max_recommendations=10):
    def heuristic(product1, product2):
        return calculate_similarity(products_dict[product1], products_dict[product2])

    def get_neighbors(row, col):
        neighbors = []
        if row > 0: neighbors.append((row - 1, col))
        if row < grid_size - 1: neighbors.append((row + 1, col))
        if col > 0: neighbors.append((row, col - 1))
        if col < grid_size - 1: neighbors.append((row, col + 1))
        return neighbors

    start_position = None
    for i in range(grid_size):
        for j in range(grid_size):
            if grid[i, j] == start:
                start_position = (i, j)
                break
        if start_position:
            break

    open_set = []
    heappush(open_set, (0, start_position))
    came_from = {}
    g_score = {pos: float('inf') for row in range(grid_size) for pos in [(row, col) for col in range(grid_size)]}
    g_score[start_position] = 0
    f_score = {pos: float('inf') for row in range(grid_size) for pos in [(row, col) for col in range(grid_size)]}
    f_score[start_position] = 0

    recommendations = []
    path = []
    while open_set and len(recommendations) < max_recommendations:
        _, current = heappop(open_set)
        path.append(current)
        current_product = grid[current[0], current[1]]
        if current_product not in recommendations:
            recommendations.append(current_product)
        neighbors = get_neighbors(current[0], current[1])
        for neighbor in neighbors:
            neighbor_product = grid[neighbor[0], neighbor[1]]
            if neighbor_product:
                tentative_g_score = g_score[current] + calculate_similarity(products_dict[current_product], products_dict[neighbor_product])
                if tentative_g_score < g_score[neighbor]:
                    came_from[neighbor] = current
                    g_score[neighbor] = tentative_g_score
                    f_score[neighbor] = g_score[neighbor] + heuristic(current_product, neighbor_product)
                    heappush(open_set, (f_score[neighbor], neighbor))

    return recommendations, path

# Choose a valid start product ID from the printed list
start_product_id = "B00P93X0VO"  # Use the first available product ID
recommendations = a_star_search_recommendations(start_product_id, grid, products_dict, max_recommendations=10)
print("Recommended products:", recommendations)

# Visualization with improved clarity
plt.figure(figsize=(50, 50))
plt.imshow(np.full((grid_size, grid_size), np.nan), cmap='viridis', interpolation='none')  # Empty grid
plt.title('Product Grid Based on Hybrid Filtering and Embeddings')

# Define colors for categories
categories_unique = list(set(p['category'] for p in products_dict.values()))
category_colors = {category: plt.cm.tab20(i / len(categories_unique)) for i, category in enumerate(categories_unique)}

for i in range(grid_size):
    plt.axhline(i - 0.5, color='gray', linewidth=0.5)
    plt.axvline(i - 0.5, color='gray', linewidth=0.5)

for (row, col), product_id in np.ndenumerate(grid):
    if product_id is not None:
        product = products_dict[product_id]
        color = category_colors[product['category']]
        plt.text(col, row, product_id, ha='center', va='center', fontsize=8, bbox=dict(facecolor=color, edgecolor='black', boxstyle='round,pad=0.3'))

# Create a legend with category colors
handles = [mpatches.Patch(color=color, label=category) for category, color in category_colors.items()]
plt.legend(handles=handles, bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

plt.show()


In [None]:
import csv
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
from gensim.models import Word2Vec
import random
import matplotlib.patches as mpatches
from heapq import heappush, heappop

# Create a OneHotEncoder instance
encoder = OneHotEncoder(sparse=False)
csv_file = "amazon.csv"

# Read the CSV data and create a dictionary of products
products_dict = {}
with open(csv_file, mode='r', encoding='utf-8') as file:
    csv_reader = csv.DictReader(file)
    for row in csv_reader:
        # Extract and clean the rating_count
        try:
            rating_count = int(row['rating_count'].replace(',', '')) if row['rating_count'] else 0
        except ValueError:
            rating_count = 0

        # Extract and clean the rating
        try:
            rating = float(row['rating'])
        except ValueError:
            rating = 0.0

        # Create a dictionary for each product
        product_details = {
            'product_id': row['product_id'],
            'product_name': row['product_name'],
            'category': row['category'],
            'rating': rating,
            'rating_count': rating_count,
            'about_product': row['about_product']
        }
        # Use product_id as the key for the products dictionary
        products_dict[row['product_id']] = product_details

# Print available product IDs
print("Available product IDs:", list(products_dict.keys())[:10])  # Print first 10 product IDs for checking

# Construct the grid
num_products = len(products_dict)
grid_size = int(np.ceil(np.sqrt(num_products)))
grid = np.empty((grid_size, grid_size), dtype=object)

# Calculate row indices using hybrid filtering
categories = np.array([p['category'] for p in products_dict.values()]).reshape(-1, 1)
category_encoded = encoder.fit_transform(categories)

features = np.array([[p['rating'], p['rating_count']] for p in products_dict.values()])
features = np.concatenate((category_encoded, features), axis=1)

scaler = MinMaxScaler()
row_indices = scaler.fit_transform(features)

# Calculate column indices using Word2Vec embeddings for better semantic similarity
corpus = [p['about_product'].split() for p in products_dict.values()]
word2vec_model = Word2Vec(sentences=corpus, vector_size=50, window=5, min_count=1, workers=4)
product_vectors = np.array([np.mean([word2vec_model.wv[word] for word in words if word in word2vec_model.wv]
                                    or [np.zeros(50)], axis=0) for words in corpus])

column_indices = scaler.fit_transform(product_vectors)

# Normalize indices to fit within grid size
normalized_row_indices = row_indices[:, 0]
normalized_col_indices = column_indices[:, 0]
row_indices = (normalized_row_indices * (grid_size - 1)).astype(int)
column_indices = (normalized_col_indices * (grid_size - 1)).astype(int)

# Shuffle the product IDs to ensure a random distribution
product_ids = list(products_dict.keys())
random.shuffle(product_ids)

# Place products in the grid based on normalized indices
for idx, product_id in enumerate(product_ids):
    row = row_indices[idx]
    col = column_indices[idx]
    while grid[row, col] is not None:
        col = (col + 1) % grid_size
        if col == 0:
            row = (row + 1) % grid_size
    grid[row, col] = product_id
    # print(f"Product ID: {product_id}, Normalized Row: {normalized_row_indices[idx]:.2f}, Normalized Column: {normalized_col_indices[idx]:.2f}")

# A* search functions
def calculate_similarity(product1, product2):
    vectorizer = TfidfVectorizer()
    text_vectors = vectorizer.fit_transform([product1['about_product'], product2['about_product']])
    cosine_sim = cosine_similarity(text_vectors[0:1], text_vectors[1:2])[0][0]
    rating_similarity = min(product1['rating'], product2['rating']) / max(product1['rating'], product2['rating'])
    rating_count_similarity = min(product1['rating_count'], product2['rating_count']) / max(product1['rating_count'], product2['rating_count'])
    similarity_score = cosine_sim + rating_similarity + rating_count_similarity
    return similarity_score

def a_star_search_recommendations(start, grid, products_dict, max_recommendations=10):
    def heuristic(product1, product2):
        return calculate_similarity(products_dict[product1], products_dict[product2])

    def get_neighbors(row, col):
        neighbors = []
        if row > 0: neighbors.append((row - 1, col))
        if row < grid_size - 1: neighbors.append((row + 1, col))
        if col > 0: neighbors.append((row, col - 1))
        if col < grid_size - 1: neighbors.append((row, col + 1))
        return neighbors

    start_position = None
    for i in range(grid_size):
        for j in range(grid_size):
            if grid[i, j] == start:
                start_position = (i, j)
                break
        if start_position:
            break

    open_set = []
    heappush(open_set, (0, start_position))
    came_from = {}
    g_score = {pos: float('inf') for row in range(grid_size) for pos in [(row, col) for col in range(grid_size)]}
    g_score[start_position] = 0
    f_score = {pos: float('inf') for row in range(grid_size) for pos in [(row, col) for col in range(grid_size)]}
    f_score[start_position] = 0

    recommendations = []
    path = []
    while open_set and len(recommendations) < max_recommendations:
        _, current = heappop(open_set)
        path.append(current)
        current_product = grid[current[0], current[1]]
        if current_product not in recommendations:
            recommendations.append(current_product)
        neighbors = get_neighbors(current[0], current[1])
        for neighbor in neighbors:
            neighbor_product = grid[neighbor[0], neighbor[1]]
            if neighbor_product:
                tentative_g_score = g_score[current] + calculate_similarity(products_dict[current_product], products_dict[neighbor_product])
                if tentative_g_score < g_score[neighbor]:
                    came_from[neighbor] = current
                    g_score[neighbor] = tentative_g_score
                    f_score[neighbor] = g_score[neighbor] + heuristic(current_product, neighbor_product)
                    heappush(open_set, (f_score[neighbor], neighbor))

    return recommendations, path

# Choose a valid start product ID from the printed list
start_product_id = "B00P93X0VO"  # Use the first available product ID
recommendations, path = a_star_search_recommendations(start_product_id, grid, products_dict, max_recommendations=10)
print("Recommended products:", recommendations)

# Visualization with A* path
plt.figure(figsize=(40, 40))
plt.imshow(np.full((grid_size, grid_size), np.nan), cmap='viridis', interpolation='none')  # Empty grid
plt.title('Product Grid Based on Hybrid Filtering and Embeddings')

# Define colors for categories
categories_unique = list(set(p['category'] for p in products_dict.values()))
category_colors = {category: plt.cm.tab20(i / len(categories_unique)) for i, category in enumerate(categories_unique)}

for i in range(grid_size):
    plt.axhline(i - 0.5, color='gray', linewidth=0.5)
    plt.axvline(i - 0.5, color='gray', linewidth=0.5)

for (row, col), product_id in np.ndenumerate(grid):
    if product_id is not None:
        product = products_dict[product_id]
        color = category_colors[product['category']]
        plt.text(col, row, product_id, ha='center', va='center', fontsize=8, bbox=dict(facecolor=color, edgecolor='black', boxstyle='round,pad=0.3'))

# Highlight the A* path
for (row, col) in path:
    plt.plot(col, row, 'ro')  # Mark the path with red dots

# Create a legend with category colors
handles = [mpatches.Patch(color=color, label=category) for category, color in category_colors.items()]
plt.legend(handles=handles, bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

plt.xlabel('Embedding-based Column Index')
plt.ylabel('Hybrid Filtering-based Row Index')
plt.grid(False)
plt.show()


In [None]:
# Print recommended product details
print("Recommended products:")
for product_id in recommendations:
    product = products_dict[product_id]
    print(f"Product ID: {product['product_id']}")
    print(f"Product Name: {product['product_name']}  ")
    print(f"Category: {product['category']}\n")

In [None]:
# *************   post dataset change **************************
import csv
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
from gensim.models import Word2Vec
import random
import matplotlib.patches as mpatches
from heapq import heappush, heappop

# Create a OneHotEncoder instance
encoder = OneHotEncoder(sparse=False)
csv_file = "amazon.csv"

# Read the CSV data and create a dictionary of products
products_dict = {}
with open(csv_file, mode='r', encoding='utf-8') as file:
    csv_reader = csv.DictReader(file)
    for row in csv_reader:
        # Extract and clean the rating_count
        try:
            rating_count = int(row['rating_count'].replace(',', '')) if row['rating_count'] else 0
        except ValueError:
            rating_count = 0

        # Extract and clean the rating
        try:
            rating = float(row['rating'])
        except ValueError:
            rating = 0.0

        # Create a dictionary for each product
        product_details = {
            'product_id': row['product_id'],
            'product_name': row['product_name'],
            'category': row['category'],
            'rating': rating,
            'rating_count': rating_count,
            'about_product': row['about_product']
        }
        # Use product_id as the key for the products dictionary
        products_dict[row['product_id']] = product_details

# Print available product IDs
print("Available product IDs:", list(products_dict.keys())[:10])  # Print first 10 product IDs for checking

# Construct the grid
num_products = len(products_dict)
grid_size = int(np.ceil(np.sqrt(num_products)))
grid = np.empty((grid_size, grid_size), dtype=object)

# Calculate row indices using hybrid filtering
categories = np.array([p['category'] for p in products_dict.values()]).reshape(-1, 1)
category_encoded = encoder.fit_transform(categories)

features = np.array([[p['rating'], p['rating_count']] for p in products_dict.values()])
features = np.concatenate((category_encoded, features), axis=1)

scaler = MinMaxScaler()
row_indices = scaler.fit_transform(features)

# Calculate column indices using Word2Vec embeddings for better semantic similarity
corpus = [p['about_product'].split() for p in products_dict.values()]
word2vec_model = Word2Vec(sentences=corpus, vector_size=50, window=5, min_count=1, workers=4)
product_vectors = np.array([np.mean([word2vec_model.wv[word] for word in words if word in word2vec_model.wv]
                                    or [np.zeros(50)], axis=0) for words in corpus])

column_indices = scaler.fit_transform(product_vectors)

# Normalize indices to fit within grid size
normalized_row_indices = row_indices[:, 0]
normalized_col_indices = column_indices[:, 0]
row_indices = (normalized_row_indices * (grid_size - 1)).astype(int)
column_indices = (normalized_col_indices * (grid_size - 1)).astype(int)

# Shuffle the product IDs to ensure a random distribution
product_ids = list(products_dict.keys())
random.shuffle(product_ids)

# Place products in the grid based on normalized indices
for idx, product_id in enumerate(product_ids):
    row = row_indices[idx]
    col = column_indices[idx]
    while grid[row, col] is not None:
        col = (col + 1) % grid_size
        if col == 0:
            row = (row + 1) % grid_size
    grid[row, col] = product_id
    print(f"Product ID: {product_id}, Normalized Row: {normalized_row_indices[idx]:.2f}, Normalized Column: {normalized_col_indices[idx]:.2f}")

# A* search functions
def calculate_similarity(product1, product2):
    vectorizer = TfidfVectorizer()
    text_vectors = vectorizer.fit_transform([product1['about_product'], product2['about_product']])
    cosine_sim = cosine_similarity(text_vectors[0:1], text_vectors[1:2])[0][0]
    rating_similarity = min(product1['rating'], product2['rating']) / max(product1['rating'], product2['rating'])
    rating_count_similarity = min(product1['rating_count'], product2['rating_count']) / max(product1['rating_count'], product2['rating_count'])
    similarity_score = cosine_sim + rating_similarity + rating_count_similarity
    return similarity_score

def a_star_search_recommendations(start, grid, products_dict, max_recommendations=10):
    def heuristic(product1, product2):
        return calculate_similarity(products_dict[product1], products_dict[product2])

    def get_neighbors(row, col):
        neighbors = []
        if row > 0: neighbors.append((row - 1, col))
        if row < grid_size - 1: neighbors.append((row + 1, col))
        if col > 0: neighbors.append((row, col - 1))
        if col < grid_size - 1: neighbors.append((row, col + 1))
        return neighbors

    start_position = None
    for i in range(grid_size):
        for j in range(grid_size):
            if grid[i, j] == start:
                start_position = (i, j)
                break
        if start_position:
            break

    open_set = []
    heappush(open_set, (0, start_position))
    came_from = {}
    g_score = {pos: float('inf') for row in range(grid_size) for pos in [(row, col) for col in range(grid_size)]}
    g_score[start_position] = 0
    f_score = {pos: float('inf') for row in range(grid_size) for pos in [(row, col) for col in range(grid_size)]}
    f_score[start_position] = 0

    recommendations = []
    path = []
    while open_set and len(recommendations) < max_recommendations:
        _, current = heappop(open_set)
        path.append(current)
        current_product = grid[current[0], current[1]]
        if current_product not in recommendations:
            recommendations.append(current_product)
        neighbors = get_neighbors(current[0], current[1])
        for neighbor in neighbors:
            neighbor_product = grid[neighbor[0], neighbor[1]]
            if neighbor_product:
                tentative_g_score = g_score[current] + calculate_similarity(products_dict[current_product], products_dict[neighbor_product])
                if tentative_g_score < g_score[neighbor]:
                    came_from[neighbor] = current
                    g_score[neighbor] = tentative_g_score
                    f_score[neighbor] = g_score[neighbor] + heuristic(current_product, neighbor_product)
                    heappush(open_set, (f_score[neighbor], neighbor))

    return recommendations, path

# Choose a valid start product ID from the printed list
start_product_id = "B07VLDQMV3"  # Use the first available product ID
recommendations, path = a_star_search_recommendations(start_product_id, grid, products_dict, max_recommendations=10)
print("Recommended products:", recommendations)

# Visualization with A* path
plt.figure(figsize=(50, 50))
plt.imshow(np.full((grid_size, grid_size), np.nan), cmap='viridis', interpolation='none')  # Empty grid
plt.title('Product Grid Based on Hybrid Filtering and Embeddings')

# Define colors for categories
categories_unique = list(set(p['category'] for p in products_dict.values()))
category_colors = {category: plt.cm.tab20(i / len(categories_unique)) for i, category in enumerate(categories_unique)}

for i in range(grid_size):
    plt.axhline(i - 0.5, color='gray', linewidth=0.5)
    plt.axvline(i - 0.5, color='gray', linewidth=0.5)

for (row, col), product_id in np.ndenumerate(grid):
    if product_id is not None:
        product = products_dict[product_id]
        color = category_colors[product['category']]
        plt.text(col, row, product_id, ha='center', va='center', fontsize=8, bbox=dict(facecolor=color, edgecolor='black', boxstyle='round,pad=0.3'))

# Highlight the A* path
for (row, col) in path:
    plt.plot(col, row, 'ro')  # Mark the path with red dots

# Highlight the recommended products
for product_id in recommendations:
    for i in range(grid_size):
        for j in range(grid_size):
            if grid[i, j] == product_id:
                plt.text(j, i, product_id, ha='center', va='center', fontsize=8, bbox=dict(facecolor='yellow', edgecolor='black', boxstyle='round,pad=0.3'))

# Create a legend with category colors
handles = [mpatches.Patch(color=color, label=category) for category, color in category_colors.items()]
plt.legend(handles=handles, bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

plt.xlabel('Embedding-based Column Index')
plt.ylabel('Hybrid Filtering-based Row Index')
plt


In [None]:
# Print recommended product details
print("Recommended products:")
for product_id in recommendations:
    product = products_dict[product_id]
    print(f"Product ID: {product['product_id']}")
    print(f"Product Name: {product['product_name']}  ")
    print(f"Category: {product['category']}\n")

In [None]:
# ********************** changing the color completely **************************
import csv
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
from gensim.models import Word2Vec
import random
import matplotlib.patches as mpatches
from heapq import heappush, heappop

# Create a OneHotEncoder instance
encoder = OneHotEncoder(sparse=False)
csv_file = "amazon.csv"

# Read the CSV data and create a dictionary of products
products_dict = {}
with open(csv_file, mode='r', encoding='utf-8') as file:
    csv_reader = csv.DictReader(file)
    for row in csv_reader:
        # Extract and clean the rating_count
        try:
            rating_count = int(row['rating_count'].replace(',', '')) if row['rating_count'] else 0
        except ValueError:
            rating_count = 0

        # Extract and clean the rating
        try:
            rating = float(row['rating'])
        except ValueError:
            rating = 0.0

        # Create a dictionary for each product
        product_details = {
            'product_id': row['product_id'],
            'product_name': row['product_name'],
            'category': row['category'],
            'rating': rating,
            'rating_count': rating_count,
            'about_product': row['about_product']
        }
        # Use product_id as the key for the products dictionary
        products_dict[row['product_id']] = product_details

# Print available product IDs
print("Available product IDs:", list(products_dict.keys())[:10])  # Print first 10 product IDs for checking

# Construct the grid
num_products = len(products_dict)
grid_size = int(np.ceil(np.sqrt(num_products)))
grid = np.empty((grid_size, grid_size), dtype=object)

# Calculate row indices using hybrid filtering
categories = np.array([p['category'] for p in products_dict.values()]).reshape(-1, 1)
category_encoded = encoder.fit_transform(categories)

features = np.array([[p['rating'], p['rating_count']] for p in products_dict.values()])
features = np.concatenate((category_encoded, features), axis=1)

scaler = MinMaxScaler()
row_indices = scaler.fit_transform(features)

# Calculate column indices using Word2Vec embeddings for better semantic similarity
corpus = [p['about_product'].split() for p in products_dict.values()]
word2vec_model = Word2Vec(sentences=corpus, vector_size=50, window=5, min_count=1, workers=4)
product_vectors = np.array([np.mean([word2vec_model.wv[word] for word in words if word in word2vec_model.wv]
                                    or [np.zeros(50)], axis=0) for words in corpus])

column_indices = scaler.fit_transform(product_vectors)

# Normalize indices to fit within grid size
normalized_row_indices = row_indices[:, 0]
normalized_col_indices = column_indices[:, 0]
row_indices = (normalized_row_indices * (grid_size - 1)).astype(int)
column_indices = (normalized_col_indices * (grid_size - 1)).astype(int)

# Shuffle the product IDs to ensure a random distribution
product_ids = list(products_dict.keys())
random.shuffle(product_ids)

# Place products in the grid based on normalized indices
for idx, product_id in enumerate(product_ids):
    row = row_indices[idx]
    col = column_indices[idx]
    while grid[row, col] is not None:
        col = (col + 1) % grid_size
        if col == 0:
            row = (row + 1) % grid_size
    grid[row, col] = product_id

# A* search functions
def calculate_similarity(product1, product2):
    vectorizer = TfidfVectorizer()
    text_vectors = vectorizer.fit_transform([product1['about_product'], product2['about_product']])
    cosine_sim = cosine_similarity(text_vectors[0:1], text_vectors[1:2])[0][0]
    rating_similarity = min(product1['rating'], product2['rating']) / max(product1['rating'], product2['rating'])
    rating_count_similarity = min(product1['rating_count'], product2['rating_count']) / max(product1['rating_count'], product2['rating_count'])
    similarity_score = cosine_sim + rating_similarity + rating_count_similarity
    return similarity_score

def a_star_search_recommendations(start, grid, products_dict, max_recommendations=10):
    def heuristic(product_id):
        current_product = products_dict[product_id]
        avg_similarity = np.mean([calculate_similarity(current_product, products_dict[other_id])
                                  for other_id in products_dict if other_id != product_id])
        return avg_similarity

    def get_neighbors(row, col):
        neighbors = []
        if row > 0 and grid[row - 1, col] is not None: neighbors.append((row - 1, col))
        if row < grid_size - 1 and grid[row + 1, col] is not None: neighbors.append((row + 1, col))
        if col > 0 and grid[row, col - 1] is not None: neighbors.append((row, col - 1))
        if col < grid_size - 1 and grid[row, col + 1] is not None: neighbors.append((row, col + 1))
        return neighbors

    start_position = None
    for i in range(grid_size):
        for j in range(grid_size):
            if grid[i, j] == start:
                start_position = (i, j)
                break
        if start_position:
            break

    if start_position is None:
        raise ValueError("Start product ID not found in the grid.")

    open_set = []
    heappush(open_set, (0, start_position))
    came_from = {}
    g_score = {pos: float('inf') for row in range(grid_size) for pos in [(row, col) for col in range(grid_size)]}
    g_score[start_position] = 0
    f_score = {pos: float('inf') for row in range(grid_size) for pos in [(row, col) for col in range(grid_size)]}
    f_score[start_position] = heuristic(start)

    recommendations = []
    path = []
    while open_set and len(recommendations) < max_recommendations:
        _, current = heappop(open_set)
        path.append(current)
        current_product = grid[current[0], current[1]]
        if current_product not in recommendations:
            recommendations.append(current_product)
        neighbors = get_neighbors(current[0], current[1])
        for neighbor in neighbors:
            neighbor_product = grid[neighbor[0], neighbor[1]]
            if neighbor_product:
                tentative_g_score = g_score[current] + calculate_similarity(products_dict[current_product], products_dict[neighbor_product])
                if tentative_g_score < g_score[neighbor]:
                    came_from[neighbor] = current
                    g_score[neighbor] = tentative_g_score
                    f_score[neighbor] = g_score[neighbor] + heuristic(neighbor_product)
                    heappush(open_set, (f_score[neighbor], neighbor))

    return recommendations, path

# Choose a valid start product ID from the printed list
start_product_id = "B00IN9AGAE"  # Use the first available product ID
recommendations, path = a_star_search_recommendations(start_product_id, grid, products_dict, max_recommendations=10)
print("Recommended products:", recommendations)

# Visualization with A* path
plt.figure(figsize=(20, 20))
plt.imshow(np.full((grid_size, grid_size), np.nan), cmap='viridis', interpolation='none')  # Empty grid
plt.title('Product Grid Based on Hybrid Filtering and Embeddings')

# Define colors for categories
categories_unique = list(set(p['category'] for p in products_dict.values()))
category_colors = {category: plt.cm.tab20(i / len(categories_unique)) for i, category in enumerate(categories_unique)}

for i in range(grid_size):
    plt.axhline(i - 0.5, color='gray', linewidth=0.5)
    plt.axvline(i - 0.5, color='gray', linewidth=0.5)

for (row, col), product_id in np.ndenumerate(grid):
    if product_id is not None:
        product = products_dict[product_id]
        color = category_colors[product['category']]
        plt.gca().add_patch(plt.Rectangle((col - 0.5, row - 0.5), 1, 1, edgecolor='black', facecolor=color, lw=1))
        plt.text(col, row, product_id, ha='center', va='center', fontsize=8, color='black')

# Highlight the recommended products with a distinct color
recommendation_color = 'red'
for (row, col) in path:
    if grid[row, col] in recommendations:
        plt.gca().add_patch(plt.Rectangle((col - 0.5, row - 0.5), 1, 1, edgecolor='red', facecolor=recommendation_color, lw=2))

# Create a legend for the categories
handles = [mpatches.Patch(color=color, label=category) for category, color in category_colors.items()]
handles.append(mpatches.Patch(color=recommendation_color, label='Recommended Products'))
plt.legend(handles=handles, bbox_to_anchor=(1.05, 1), loc='upper left')

plt.show()


In [None]:
import csv
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
from gensim.models import Word2Vec
import random
import matplotlib.patches as mpatches
from heapq import heappush, heappop

# Create a OneHotEncoder instance
encoder = OneHotEncoder(sparse=False)
csv_file = "amazon_reviews.csv"

# Read the CSV data and create a dictionary of products
products_dict = {}
with open(csv_file, mode='r', encoding='utf-8') as file:
    csv_reader = csv.DictReader(file)
    for row in csv_reader:
        # Extract and clean the rating_number
        try:
            rating_number = int(row['rating_number'].replace(',', '')) if row['rating_number'] else 0
        except ValueError:
            rating_number = 0

        # Extract and clean the average_rating
        try:
            average_rating = float(row['average_rating'])
        except ValueError:
            average_rating = 0.0

        # Create a dictionary for each product
        product_details = {
            'product_id': row['parent_asin'],  # Assuming parent_asin is the unique product identifier
            'product_name': row['title'],
            'category': row['main_category'],
            'rating': average_rating,
            'rating_count': rating_number,
            'about_product': row['description'],
            'bought_together': row['bought_together']
        }
        # Use parent_asin as the key for the products dictionary
        products_dict[row['parent_asin']] = product_details

# Print available product IDs
print("Available product IDs:", list(products_dict.keys())[:10])  # Print first 10 product IDs for checking

# Construct the grid
num_products = len(products_dict)
grid_size = int(np.ceil(np.sqrt(num_products)))
grid = np.empty((grid_size, grid_size), dtype=object)

# Calculate row indices using hybrid filtering
categories = np.array([p['category'] for p in products_dict.values()]).reshape(-1, 1)
category_encoded = encoder.fit_transform(categories)

features = np.array([[p['rating'], p['rating_count']] for p in products_dict.values()])
features = np.concatenate((category_encoded, features), axis=1)

scaler = MinMaxScaler()
row_indices = scaler.fit_transform(features)

# Calculate column indices using Word2Vec embeddings for better semantic similarity
corpus = [p['about_product'].split() for p in products_dict.values()]
word2vec_model = Word2Vec(sentences=corpus, vector_size=50, window=5, min_count=1, workers=4)
product_vectors = np.array([np.mean([word2vec_model.wv[word] for word in words if word in word2vec_model.wv]
                                    or [np.zeros(50)], axis=0) for words in corpus])

column_indices = scaler.fit_transform(product_vectors)

# Normalize indices to fit within grid size
normalized_row_indices = row_indices[:, 0]
normalized_col_indices = column_indices[:, 0]
row_indices = (normalized_row_indices * (grid_size - 1)).astype(int)
column_indices = (normalized_col_indices * (grid_size - 1)).astype(int)

# Shuffle the product IDs to ensure a random distribution
product_ids = list(products_dict.keys())
random.shuffle(product_ids)

# Place products in the grid based on normalized indices
for idx, product_id in enumerate(product_ids):
    row = row_indices[idx]
    col = column_indices[idx]
    while grid[row, col] is not None:
        col = (col + 1) % grid_size
        if col == 0:
            row = (row + 1) % grid_size
    grid[row, col] = product_id
    print(f"Product ID: {product_id}, Normalized Row: {normalized_row_indices[idx]:.2f}, Normalized Column: {normalized_col_indices[idx]:.2f}")

# A* search functions
def calculate_similarity(product1, product2):
    vectorizer = TfidfVectorizer()
    text_vectors = vectorizer.fit_transform([product1['about_product'], product2['about_product']])
    cosine_sim = cosine_similarity(text_vectors[0:1], text_vectors[1:2])[0][0]
    
    if product1['rating'] == 0 or product2['rating'] == 0:
        rating_similarity = 0
    else:
        rating_similarity = min(product1['rating'], product2['rating']) / max(product1['rating'], product2['rating'])
    
    if product1['rating_count'] == 0 or product2['rating_count'] == 0:
        rating_count_similarity = 0
    else:
        rating_count_similarity = min(product1['rating_count'], product2['rating_count']) / max(product1['rating_count'], product2['rating_count'])
    
    similarity_score = cosine_sim + rating_similarity + rating_count_similarity
    return similarity_score

def a_star_search_recommendations(start, grid, products_dict, max_recommendations=10):
    def heuristic(product1, product2):
        return calculate_similarity(products_dict[product1], products_dict[product2])

    def get_neighbors(row, col):
        neighbors = []
        if row > 0: neighbors.append((row - 1, col))
        if row < grid_size - 1: neighbors.append((row + 1, col))
        if col > 0: neighbors.append((row, col - 1))
        if col < grid_size - 1: neighbors.append((row, col + 1))
        return neighbors

    start_position = None
    for i in range(grid_size):
        for j in range(grid_size):
            if grid[i, j] == start:
                start_position = (i, j)
                break
        if start_position:
            break

    open_set = []
    heappush(open_set, (0, start_position))
    came_from = {}
    g_score = {pos: float('inf') for row in range(grid_size) for pos in [(row, col) for col in range(grid_size)]}
    g_score[start_position] = 0
    f_score = {pos: float('inf') for row in range(grid_size) for pos in [(row, col) for col in range(grid_size)]}
    f_score[start_position] = 0

    recommendations = []
    path = []
    while open_set and len(recommendations) < max_recommendations:
        _, current = heappop(open_set)
        path.append(current)
        current_product = grid[current[0], current[1]]
        if current_product not in recommendations:
            recommendations.append(current_product)
        neighbors = get_neighbors(current[0], current[1])
        for neighbor in neighbors:
            neighbor_product = grid[neighbor[0], neighbor[1]]
            if neighbor_product:
                tentative_g_score = g_score[current] + calculate_similarity(products_dict[current_product], products_dict[neighbor_product])
                if tentative_g_score < g_score[neighbor]:
                    came_from[neighbor] = current
                    g_score[neighbor] = tentative_g_score
                    f_score[neighbor] = g_score[neighbor] + heuristic(current_product, neighbor_product)
                    heappush(open_set, (f_score[neighbor], neighbor))

    return recommendations, path

# Choose a valid start product ID from the printed list
start_product_id = "B00KAS9ZMG"  # Use the first available product ID
recommendations, path = a_star_search_recommendations(start_product_id, grid, products_dict, max_recommendations=10)
print("Recommended products:", recommendations)

# Visualization with A* path
plt.figure(figsize=(50, 50))
plt.imshow(np.full((grid_size, grid_size), np.nan), cmap='viridis', interpolation='none')  # Empty grid
plt.title('Product Grid Based on Hybrid Filtering and Embeddings')

# Define colors for categories
categories_unique = list(set(p['category'] for p in products_dict.values()))
category_colors = {category: plt.cm.tab20(i / len(categories_unique)) for i, category in enumerate(categories_unique)}

for i in range(grid_size):
    plt.axhline(i - 0.5, color='gray', linewidth=0.5)
    plt.axvline(i - 0.5, color='gray', linewidth=0.5)

for (row, col), product_id in np.ndenumerate(grid):
    if product_id is not None:
        product = products_dict[product_id]
        color = category_colors[product['category']]
        plt.text(col, row, product_id, ha='center', va='center', fontsize=8, bbox=dict(facecolor=color, edgecolor='black', boxstyle='round,pad=0.3'))

# Highlight the A* path
for (row, col) in path:
    plt.plot(col, row, 'ro')  # Mark the path with red dots

# Highlight the recommended products
for product_id in recommendations:
    for i in range(grid_size):
        for j in range(grid_size):
            if grid[i, j] == product_id:
                plt.text(j, i, product_id, ha='center', va='center', fontsize=8, bbox=dict(facecolor='yellow', edgecolor='black', boxstyle='round,pad=0.3'))

# Create a legend with category colors
handles = [mpatches.Patch(color=color, label=category) for category, color in category_colors.items()]
plt.legend(handles=handles, bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

plt.xlabel('Embedding-based Column Index')
plt.ylabel('Hybrid Filtering-based Row Index')
plt.show()


In [None]:
# Print recommended product details
print("Recommended products:")
for product_id in recommendations:
    product = products_dict[product_id]
    print(f"Product ID: {product['product_id']}")
    print(f"Product Name: {product['product_name']}  ")
    print(f"Category: {product['category']}\n")

In [None]:
import csv
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
from gensim.models import Word2Vec
import random
import matplotlib.patches as mpatches
from heapq import heappush, heappop

# Create a OneHotEncoder instance
encoder = OneHotEncoder(sparse=False)
csv_file = "amazon_reviews.csv"

# Read the CSV data and create a dictionary of products
products_dict = {}
with open(csv_file, mode='r', encoding='utf-8') as file:
    csv_reader = csv.DictReader(file)
    for row in csv_reader:
        # Extract and clean the rating_number
        try:
            rating_number = int(row['rating_number'].replace(',', '')) if row['rating_number'] else 0
        except ValueError:
            rating_number = 0

        # Extract and clean the average_rating
        try:
            average_rating = float(row['average_rating'])
        except ValueError:
            average_rating = 0.0

        # Create a dictionary for each product
        product_details = {
            'product_id': row['parent_asin'],  # Assuming parent_asin is the unique product identifier
            'product_name': row['title'],
            'category': row['main_category'],
            'rating': average_rating,
            'rating_count': rating_number,
            'about_product': row['description'],
            'bought_together': row['bought_together']
        }
        # Use parent_asin as the key for the products dictionary
        products_dict[row['parent_asin']] = product_details

# Print available product IDs
print("Available product IDs:", list(products_dict.keys())[:10])  # Print first 10 product IDs for checking

# Construct the grid
num_products = len(products_dict)
grid_size = int(np.ceil(np.sqrt(num_products)))
grid = np.empty((grid_size, grid_size), dtype=object)

# Calculate row indices using hybrid filtering
categories = np.array([p['category'] for p in products_dict.values()]).reshape(-1, 1)
category_encoded = encoder.fit_transform(categories)

features = np.array([[p['rating'],np.log1p['rating_count']] for p in products_dict.values()])
features = np.concatenate((category_encoded, features), axis=1)

scaler = MinMaxScaler()
row_indices = scaler.fit_transform(features)

# Calculate column indices using Word2Vec embeddings for better semantic similarity
corpus = [p['about_product'].split() for p in products_dict.values()]
word2vec_model = Word2Vec(sentences=corpus, vector_size=50, window=5, min_count=1, workers=4)
product_vectors = np.array([np.mean([word2vec_model.wv[word] for word in words if word in word2vec_model.wv]
                                    or [np.zeros(50)], axis=0) for words in corpus])

column_indices = scaler.fit_transform(product_vectors)

# Normalize indices to fit within grid size
normalized_row_indices = row_indices[:, 0]
normalized_col_indices = column_indices[:, 0]
row_indices = (normalized_row_indices * (grid_size - 1)).astype(int)
column_indices = (normalized_col_indices * (grid_size - 1)).astype(int)

# Sort products by category to place similar categories together
sorted_product_ids = sorted(products_dict.keys(), key=lambda x: products_dict[x]['category'])

# Place products in the grid based on normalized indices
for idx, product_id in enumerate(sorted_product_ids):
    row = row_indices[idx]
    col = column_indices[idx]
    while grid[row, col] is not None:
        col = (col + 1) % grid_size
        if col == 0:
            row = (row + 1) % grid_size
    grid[row, col] = product_id
    print(f"Product ID: {product_id}, Normalized Row: {normalized_row_indices[idx]:.2f}, Normalized Column: {normalized_col_indices[idx]:.2f}")

# A* search functions
def calculate_similarity(product1, product2):
    vectorizer = TfidfVectorizer()
    text_vectors = vectorizer.fit_transform([product1['about_product'], product2['about_product']])
    cosine_sim = cosine_similarity(text_vectors[0:1], text_vectors[1:2])[0][0]
    
    if product1['rating'] == 0 or product2['rating'] == 0:
        rating_similarity = 0
    else:
        rating_similarity = min(product1['rating'], product2['rating']) / max(product1['rating'], product2['rating'])
    
    if product1['rating_count'] == 0 or product2['rating_count'] == 0:
        rating_count_similarity = 0
    else:
        rating_count_similarity = min(product1['rating_count'], product2['rating_count']) / max(product1['rating_count'], product2['rating_count'])
    
    similarity_score = cosine_sim + rating_similarity + rating_count_similarity
    return similarity_score

def a_star_search_recommendations(start, grid, products_dict, max_recommendations=10):
    def heuristic(product1, product2):
        grid_distance = abs(product1[0] - product2[0]) + abs(product1[1] - product2[1])
        product_similarity = calculate_similarity(products_dict[grid[product1[0], product1[1]]], products_dict[grid[product2[0], product2[1]]])
        return grid_distance - product_similarity

    def get_neighbors(row, col):
        neighbors = []
        if row > 0: neighbors.append((row - 1, col))
        if row < grid_size - 1: neighbors.append((row + 1, col))
        if col > 0: neighbors.append((row, col - 1))
        if col < grid_size - 1: neighbors.append((row, col + 1))
        return neighbors

    start_position = None
    for i in range(grid_size):
        for j in range(grid_size):
            if grid[i, j] == start:
                start_position = (i, j)
                break
        if start_position:
            break

    open_set = []
    heappush(open_set, (0, start_position))
    came_from = {}
    g_score = {pos: float('inf') for row in range(grid_size) for pos in [(row, col) for col in range(grid_size)]}
    g_score[start_position] = 0
    f_score = {pos: float('inf') for row in range(grid_size) for pos in [(row, col) for col in range(grid_size)]}
    f_score[start_position] = 0

    recommendations = []
    path = []
    while open_set and len(recommendations) < max_recommendations:
        _, current = heappop(open_set)
        path.append(current)
        current_product = grid[current[0], current[1]]
        if current_product not in recommendations:
            recommendations.append(current_product)
        neighbors = get_neighbors(current[0], current[1])
        for neighbor in neighbors:
            neighbor_product = grid[neighbor[0], neighbor[1]]
            if neighbor_product:
                tentative_g_score = g_score[current] + calculate_similarity(products_dict[current_product], products_dict[neighbor_product])
                if tentative_g_score < g_score[neighbor]:
                    came_from[neighbor] = current
                    g_score[neighbor] = tentative_g_score
                    f_score[neighbor] = g_score[neighbor] + heuristic(current, neighbor)
                    heappush(open_set, (f_score[neighbor], neighbor))

    return recommendations, path

# Choose a valid start product ID from the printed list
start_product_id ="B094C5ZN1M"
#"B017086GMK" # Use the first available product ID
recommendations, path = a_star_search_recommendations(start_product_id, grid, products_dict, max_recommendations=10)
print("Recommended products:", recommendations)

# Visualization with A* path
plt.figure(figsize=(50, 50))
plt.imshow(np.full((grid_size, grid_size), np.nan), cmap='viridis', interpolation='none')  # Empty grid
plt.title('Product Grid Based on Hybrid Filtering and Embeddings')

# Define colors for categories
categories_unique = list(set(p['category'] for p in products_dict.values()))
category_colors = {category: plt.cm.tab20(i / len(categories_unique)) for i, category in enumerate(categories_unique)}

for i in range(grid_size):
    plt.axhline(i - 0.5, color='gray', linewidth=0.5)
    plt.axvline(i - 0.5, color='gray', linewidth=0.5)

for (row, col), product_id in np.ndenumerate(grid):
    if product_id is not None:
        product = products_dict[product_id]
        color = category_colors[product['category']]
        plt.text(col, row, product_id, ha='center', va='center', fontsize=8, bbox=dict(facecolor=color, edgecolor='black', boxstyle='round,pad=0.3'))

# Highlight the A* path
for (row, col) in path:
    plt.plot(col, row, 'ro')  # Mark the path with red dots

# Highlight the recommended products
for product_id in recommendations:
    for i in range(grid_size):
        for j in range(grid_size):
            if grid[i, j] == product_id:
                plt.text(j, i, product_id, ha='center', va='center', fontsize=8, bbox=dict(facecolor='yellow', edgecolor='black', boxstyle='round,pad=0.3'))

# Create a legend with category colors
handles = [mpatches.Patch(color=color, label=category) for category, color in category_colors.items()]
plt.legend(handles=handles, bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

plt.xlabel('Embedding-based Column Index')
plt.ylabel('Hybrid Filtering-based Row Index')
plt.show()


In [None]:
# Print recommended product details
print("Recommended products:")
for product_id in recommendations:
    product = products_dict[product_id]
    print(f"Product ID: {product['product_id']}")
    print(f"Product Name: {product['product_name']}  ")
    print(f"Category: {product['category']}\n")

In [None]:
# Visualization with A* path
plt.figure(figsize=(100, 100))
plt.imshow(np.full((grid_size, grid_size), np.nan), cmap='viridis', interpolation='none')  # Empty grid
plt.title('Product Grid Based on Hybrid Filtering and Embeddings')

# Define colors for categories
categories_unique = list(set(p['category'] for p in products_dict.values()))
category_colors = {category: plt.cm.tab20(i / len(categories_unique)) for i, category in enumerate(categories_unique)}

for i in range(grid_size):
    plt.axhline(i - 0.5, color='gray', linewidth=0.5)
    plt.axvline(i - 0.5, color='gray', linewidth=0.5)

for (row, col), product_id in np.ndenumerate(grid):
    if product_id is not None:
        product = products_dict[product_id]
        color = category_colors[product['category']]
        plt.text(col, row, product_id, ha='center', va='center', fontsize=8, bbox=dict(facecolor=color, edgecolor='black', boxstyle='round,pad=0.3'))

# Highlight the A* path
for (row, col) in path:
    plt.plot(col, row, 'ro')  # Mark the path with red dots

# Highlight the recommended products
for product_id in recommendations:
    for i in range(grid_size):
        for j in range(grid_size):
            if grid[i, j] == product_id:
                plt.text(j, i, product_id, ha='center', va='center', fontsize=8, bbox=dict(facecolor='yellow', edgecolor='black', boxstyle='round,pad=0.3'))

# Plot the path taken by A*
for idx in range(len(path) - 1):
    current_row, current_col = path[idx]
    next_row, next_col = path[idx + 1]
    plt.plot([current_col, next_col], [current_row, next_row], 'r-', linewidth=2)  # Plot the path with a red line

# Create a legend with category colors
handles = [mpatches.Patch(color=color, label=category) for category, color in category_colors.items()]
plt.legend(handles=handles, bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

plt.xlabel('Embedding-based Column Index')
plt.ylabel('Hybrid Filtering-based Row Index')
plt.show()


In [None]:
# Visualization with A* path
plt.figure(figsize=(20, 20))  # Adjusted figure size

# Define colors for categories
categories_unique = list(set(p['category'] for p in products_dict.values()))
category_colors = {category: plt.cm.tab20(i / len(categories_unique)) for i, category in enumerate(categories_unique)}

# Plot the grid cells with category colors
for (row, col), product_id in np.ndenumerate(grid):
    if product_id is not None:
        product = products_dict[product_id]
        color = category_colors[product['category']]
        plt.scatter(col, row, color=color, marker='s')

# Highlight the A* path
path_x = [col for row, col in path]
path_y = [row for row, col in path]
plt.plot(path_x, path_y, 'ro-')

# Highlight the recommended products
for product_id in recommendations:
    for i in range(grid_size):
        for j in range(grid_size):
            if grid[i, j] == product_id:
                plt.scatter(j, i, color='yellow', marker='o', s=100)

# Create a legend with category colors
handles = [mpatches.Patch(color=color, label=category) for category, color in category_colors.items()]
plt.legend(handles=handles, bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

plt.xlabel('Embedding-based Column Index')
plt.ylabel('Hybrid Filtering-based Row Index')
plt.axis('off')  # Turn off the axis
plt.show()


In [None]:
# Visualization with A* path
plt.figure(figsize=(20, 20))  # Adjusted figure size

# Define colors for categories
categories_unique = list(set(p['category'] for p in products_dict.values()))
category_colors = {category: plt.cm.tab20(i / len(categories_unique)) for i, category in enumerate(categories_unique)}

# Plot the grid cells with category colors
for (row, col), product_id in np.ndenumerate(grid):
    if product_id is not None:
        product = products_dict[product_id]
        color = category_colors[product['category']]
        plt.scatter(col, row, color=color, marker='s')

# Highlight the recommended products
for product_id in recommendations:
    for i in range(grid_size):
        for j in range(grid_size):
            if grid[i, j] == product_id:
                product = products_dict[product_id]
                color = category_colors[product['category']]
                plt.scatter(j, i, color=color, marker='o', s=100)

# Highlight the A* path
path_x = [col for row, col in path]
path_y = [row for row, col in path]
plt.plot(path_x, path_y, 'ro-', linewidth=2)  # Highlight the path with red lines

# Create a legend with category colors
handles = [mpatches.Patch(color=color, label=category) for category, color in category_colors.items()]
plt.legend(handles=handles, bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

plt.xlabel('Embedding-based Column Index')
plt.ylabel('Hybrid Filtering-based Row Index')
plt.axis('off')  # Turn off the axis
plt.show()
