In [1]:
from pathlib import Path
import pandas as pd

DATA_DIR = Path("../data")
products = pd.read_csv(DATA_DIR/"sample_products.csv")
def tags_to_text(tag_string):
    if pd.isna(tag_string) or str(tag_string).strip()=="":
        return ""
    return " ".join(str(tag_string).split("|"))

products["tags_text"] = products["tags"].apply(tags_to_text)

products["text_features"] = (
    products["category"].fillna("") + " " +
    products["brand"].fillna("")+ " " +
    products["tags_text"].fillna("")+ " " +
    products["description"].fillna("")
)

products[["product_id","category", "brand", "tags_text", "text_features"]].head()

Unnamed: 0,product_id,category,brand,tags_text,text_features
0,1,Electronics,PageTurner,battery wireless bluetooth 4K,Electronics PageTurner battery wireless blueto...
1,2,Beauty,PageTurner,anti-aging SPF vitamin-c sensitive skincare,Beauty PageTurner anti-aging SPF vitamin-c sen...
2,3,Fashion,PageTurner,summer formal casual leather,Fashion PageTurner summer formal casual leathe...
3,4,Home,Silk&Co,eco-friendly durable minimal,Home Silk&Co eco-friendly durable minimal Silk...
4,5,Beauty,PageTurner,SPF hydrating anti-aging skincare vitamin-c,Beauty PageTurner SPF hydrating anti-aging ski...


In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(
    lowercase=True,
    stop_words="english",
    ngram_range=(1,2),
    min_df=1
)

X = vectorizer.fit_transform(products["text_features"])

print("TF-IDF matrix shape:", X.shape)

TF-IDF matrix shape: (60, 433)


In [3]:
from sklearn.metrics.pairwise import cosine_similarity

sim_matrix = cosine_similarity(X,X)

print("The Shape of sim_matrix:", sim_matrix.shape)
print("Example: sim(products 1,products 2)", sim_matrix[1,4])

The Shape of sim_matrix: (60, 60)
Example: sim(products 1,products 2) 0.4709899504609096


In [4]:
import numpy as np

def get_top_recommendations(product_idx, sim_matrix, top_n=5):
    similarity_scores = sim_matrix[product_idx]

    similar_indices = similarity_scores.argsort()[-top_n-1:-1][::-1]
    return similar_indices, similarity_scores[similar_indices]

product_idx = 0
top_n = 5
recommended_indices, recommended_scores = get_top_recommendations(product_idx, sim_matrix, top_n)

for idx,score in zip(recommended_indices, recommended_scores):
    print(f"Recommended product {idx+1} with similarity score : {score}")

Recommended product 33 with similarity score : 0.5203446531599809
Recommended product 31 with similarity score : 0.48706411646844594
Recommended product 24 with similarity score : 0.3815667684534567
Recommended product 42 with similarity score : 0.3171205650358204
Recommended product 15 with similarity score : 0.08458017411507429


In [5]:
product_names = products[['product_id', 'category', 'brand', 'tags_text', 'text_features']]

for idx, score in zip(recommended_indices, recommended_scores):
     print(f"Recommended product (ID {products.iloc[idx]['product_id']}): {products.iloc[idx]['text_features']} | Similarity score: {score}")

Recommended product (ID 33): Electronics PageTurner battery noise-cancelling wireless smart 4K PageTurner electronics item designed for battery and noise-cancelling. Ideal for everyday use. | Similarity score: 0.5203446531599809
Recommended product (ID 31): Electronics PageTurner portable 4K battery wireless noise-cancelling PageTurner electronics item designed for portable and 4K. Ideal for everyday use. | Similarity score: 0.48706411646844594
Recommended product (ID 24): Electronics GlowLab battery noise-cancelling wireless bluetooth GlowLab electronics item designed for battery and noise-cancelling. Ideal for everyday use. | Similarity score: 0.3815667684534567
Recommended product (ID 42): Electronics HomeNest portable bluetooth 4K smart wireless HomeNest electronics item designed for portable and bluetooth. Ideal for everyday use. | Similarity score: 0.3171205650358204
Recommended product (ID 15): Fashion PageTurner summer streetwear casual PageTurner fashion item designed for summ

In [6]:
user_profile = interactions.groupby('user_id')['product_id'].apply(list).to_dict()

user_1_interactions = user_profile[1]

NameError: name 'interactions' is not defined