<a href="https://colab.research.google.com/github/Mazuda-Zaki/applied-ai-projects/blob/main/02_Content_Based_Product_Recommender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# For this project, we are going to build an AI that acts like a smart matching system for niche,
# "problem-solver" products. Imagine a user typing in a daily frustration‚Äîlike needing clever home organization goods,
# unique school supplies, or ergonomic office accessories‚Äîand the AI instantly recommending the perfect product to fix it
# The Aim: What are we trying to achieve?
# The Aim: To build an AI that reads a user's written problem and matches it to the
# text description of a specific product to make a highly accurate recommendation.

In [1]:
import pandas as pd

# 1. Create our dummy data of problem-solver products
data = {
    "Product_ID": [101, 102, 103, 104, 105],
    "Product_Name": [
        "Ergonomic Under-Desk Footrest",
        "Cable Management Box",
        "Continuous Sticky Note Roller",
        "Memory Foam Seat Cushion",
        "Spill-Proof Keyboard Cover"
    ],
    "Description": [
        "Fixes lower back pain and poor posture while sitting at an office desk for long hours.",
        "Hides messy cords and power strips under the desk, keeping your home organized and safe.",
        "A roll of sticky notes for school supplies, allowing students to cut the exact size they need.",
        "An ergonomic memory foam seat cushion that relieves tailbone and back pressure for office workers.",
        "Protects your laptop keyboard from coffee spills and dust while working from home or school."
    ]
}

# 2. Convert to a Pandas DataFrame
df = pd.DataFrame(data)
print("--- PRODUCT CATALOG ---")
print(df[['Product_Name', 'Description']])

--- PRODUCT CATALOG ---
                    Product_Name  \
0  Ergonomic Under-Desk Footrest   
1           Cable Management Box   
2  Continuous Sticky Note Roller   
3       Memory Foam Seat Cushion   
4     Spill-Proof Keyboard Cover   

                                         Description  
0  Fixes lower back pain and poor posture while s...  
1  Hides messy cords and power strips under the d...  
2  A roll of sticky notes for school supplies, al...  
3  An ergonomic memory foam seat cushion that rel...  
4  Protects your laptop keyboard from coffee spil...  


In [2]:
from numpy import vectorize
# Step 2: Text Vectorization (The TF-IDF AI)
# TF(Term Frequency) : Counts how many times a word appears
# IDF(Inverse Document Frequency): Penalizes boring words (like "the", "and", "for")
# and gives high scores to unique, important words (like "ergonomic", "cords", "posture").

from sklearn.feature_extraction.text import TfidfVectorizer

# 1. Initialize the AI Vectorizer.
# 'stop_words=english' automatically removes boring words like 'the' or 'and'!
vectorizer = TfidfVectorizer(stop_words='english')

# 2. Fit the vectorizer to our descriptions and transform them into numbers
tfidf_matrix = vectorizer.fit_transform(df['Description'])

print("\n--- TF_IDF MATRIX SHAPE ---")
# This will print (5, X), meaning 5 products and X unique words discovered.
print(tfidf_matrix.shape)



--- TF_IDF MATRIX SHAPE ---
(5, 46)


In [3]:
# Step 3: The Math (Cosine Similarity)

from sklearn.metrics.pairwise import cosine_similarity

# 1. The user types their frustrating problem
user_search = "My lower back hurts so much from sitting at my office desk all day."

# 2. We convert the user's text into numbers using the EXACT same vectorizer
user_vector = vectorizer.transform([user_search])

# 3. We calculate the Cosine Similarity between the user's search and ALL products
# .flatten() just turns the result from a 2D matrix into a simple 1D list of scores
similarity_scores = cosine_similarity(user_vector,tfidf_matrix).flatten()

# Add the scores back to our original table so we can see them!
df['Match_Score'] = similarity_scores

print("\n---SIMILARITY SCORES ---")
print(df[['Product_Name', 'Match_Score']])


---SIMILARITY SCORES ---
                    Product_Name  Match_Score
0  Ergonomic Under-Desk Footrest     0.595790
1           Cable Management Box     0.117453
2  Continuous Sticky Note Roller     0.000000
3       Memory Foam Seat Cushion     0.115309
4     Spill-Proof Keyboard Cover     0.000000


In [4]:
# Step 4: The Recommendation Function

# 1. Sort the DataFrame by the Match_Score from highest to lowest (ascending=False)
top_recommendations = df.sort_values(by= 'Match_Score', ascending=False)

# 2. Grab only the top 2 results
top2 = top_recommendations.head(2)

print("\nüéØ --- TOP 2 RECOMMENDED PRODUCTS FOR YOUR PROBLEM --- üéØ")

for index, row in top2.iterrows():
  # Convert the math score (e.g., 0.45) into a readable percentage (45%)
  match_percentage = round(row['Match_Score'] * 100, 1)
  print(f"‚úîÔ∏è {row['Product_Name']} ({match_percentage} % Match)")


üéØ --- TOP 2 RECOMMENDED PRODUCTS FOR YOUR PROBLEM --- üéØ
‚úîÔ∏è Ergonomic Under-Desk Footrest (59.6 % Match)
‚úîÔ∏è Cable Management Box (11.7 % Match)
