In [1]:
import pandas as pd
import numpy as np
import pickle
from scipy.sparse import csr_matrix
from sentence_transformers import SentenceTransformer, util
import torch
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

df = pd.read_csv("cleaned_ecommerce_data.csv")
df['review text'] = df['review text'].fillna('')

with open("als_model.pkl", "rb") as f:
    als_model = pickle.load(f)
with open("user_encoder.pkl", "rb") as f:
    user_encoder = pickle.load(f)
with open("product_encoder.pkl", "rb") as f:
    product_encoder = pickle.load(f)

sparse_data = np.load("user_item_sparse.npz")
interaction_matrix = csr_matrix((sparse_data['data'], sparse_data['indices'], sparse_data['indptr']),
                                shape=sparse_data['shape'])

with open("content_model.pkl", "rb") as f:
    content_data = pickle.load(f)

content_model = content_data['model']
tfidf = content_data['tfidf']
content_df = content_data['data']

bert_model = SentenceTransformer('all-MiniLM-L6-v2')
bert_embeddings = np.load("bert_embeddings.npy")

user_input_id = input("Enter User ID: ").strip()
product_input_name = input("Enter Product Name to find similar products: ").strip()

print("\n✅ ALS Collaborative Filtering Recommendations:")
if user_input_id in df['user id'].values:
    user_index = user_encoder.transform([user_input_id])[0]
    
    item_indices, scores = als_model.recommend(user_index, interaction_matrix[user_index], N=5)
    
    for item_idx, score in zip(item_indices, scores):
        product_id = product_encoder.inverse_transform([item_idx])[0]
        product_name = df[df['product id'] == product_id]['product name'].values[0]
        print(f"Product: {product_name} | Score: {score:.4f}")
else:
    print("❌ User ID not found in dataset.")


print("\n✅ Content-Based Filtering Recommendations:")
match = content_df[content_df['product name'].str.contains(product_input_name, case=False, na=False)]
if not match.empty:
    index = match.index[0]
    tfidf_matrix = tfidf.transform(content_df['review text'])
    distances, indices = content_model.kneighbors(tfidf_matrix[index], n_neighbors=6)
    for i in indices[0][1:]:  
        print("Product:", content_df.iloc[i]['product name'])
else:
    print("❌ Product Name not found in Content-Based data.")

print("\n✅ BERT Semantic Similarity Recommendations:")
match = df[df['product name'].str.contains(product_input_name, case=False, na=False)]
if not match.empty:
    index = match.index[0]
    query_embedding = torch.tensor(bert_embeddings[index])
    all_embeddings = torch.tensor(bert_embeddings)
    cos_scores = util.pytorch_cos_sim(query_embedding, all_embeddings)[0]
    top_results = torch.topk(cos_scores, k=6)
    for score, idx in zip(top_results[0][1:], top_results[1][1:]):  
        print(f"Product: {df.iloc[int(idx)]['product name']} | Score: {score:.4f}")
else:
    print("❌ Product Name not found in BERT data.")





Enter User ID:  U0019890
Enter Product Name to find similar products:  blazer



✅ ALS Collaborative Filtering Recommendations:
Product: Washing Machine | Score: 0.0179
Product: Body Wash | Score: 0.0110
Product: Bravia TV | Score: 0.0108
Product: Ultraboost Shoes | Score: 0.0103
Product: Formal Shirt | Score: 0.0097

✅ Content-Based Filtering Recommendations:
Product: XPS 13
Product: WH-1000XM5
Product: iPhone 15
Product: Bravia TV
Product: Revolution 6

✅ BERT Semantic Similarity Recommendations:
Product: Shampoo | Score: 1.0000
Product: Revolution 6 | Score: 1.0000
Product: Suit | Score: 1.0000
Product: Blazer | Score: 1.0000
Product: Hair Serum | Score: 1.0000
