<a href="https://colab.research.google.com/github/Davansh09/Data-science-projects/blob/main/Reco_engine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:

import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

items = pd.DataFrame([
    {"item_id": 1, "name": "Wireless Mouse", "category": "Electronics", "desc": "silent clicks ergonomic optical mouse for laptop and pc"},
    {"item_id": 2, "name": "Mechanical Keyboard", "category": "Electronics", "desc": "tactile keys rgb backlight compact keyboard for gamers"},
    {"item_id": 3, "name": "Noise Cancelling Headphones", "category": "Audio", "desc": "over ear bluetooth headphones with deep bass and mic"},
    {"item_id": 4, "name": "Yoga Mat", "category": "Fitness", "desc": "non slip yoga mat for workout pilates stretching"},
    {"item_id": 5, "name": "Dumbbell Set", "category": "Fitness", "desc": "adjustable dumbbells strength training home gym"},
    {"item_id": 6, "name": "Laptop Stand", "category": "Accessories", "desc": "aluminium stand for laptop cooling ergonomic angle"},
    {"item_id": 7, "name": "Gaming Mouse Pad", "category": "Accessories", "desc": "large non slip mouse pad smooth surface for gaming"},
    {"item_id": 8, "name": "Bluetooth Speaker", "category": "Audio", "desc": "portable speaker with powerful sound and long battery"},
])

ratings = pd.DataFrame([
    # user 1 likes electronics + audio
    (1, 1, 5), (1, 2, 4), (1, 3, 4),
    # user 2 likes fitness stuff
    (2, 4, 5), (2, 5, 4),
    # user 3 mixes accessories + electronics
    (3, 6, 5), (3, 7, 4), (3, 1, 4),
    # user 4 likes audio + accessories
    (4, 3, 5), (4, 8, 4), (4, 7, 3),
    # user 5 random
    (5, 2, 3), (5, 4, 4), (5, 6, 4)
], columns=["user_id", "item_id", "rating"])

def build_user_item_matrix(ratings_df):
    """Pivot to user-item matrix with NaNs for missing."""
    ui = ratings_df.pivot_table(index="user_id", columns="item_id", values="rating")
    return ui

def user_similarity(user_item_matrix):
    """Cosine similarity between users (ignoring NaNs by filling 0)."""
    filled = user_item_matrix.fillna(0)
    sim = pd.DataFrame(
        cosine_similarity(filled.values),
        index=filled.index,
        columns=filled.index
    )
    return sim

def recommend_cf(target_user, user_item_matrix, user_sim, top_n=5):
    """Weighted sum of neighbor ratings -> predicted scores for unseen items."""
    if target_user not in user_item_matrix.index:
        raise ValueError("User not found in ratings")


    user_row = user_item_matrix.loc[target_user]
    unseen_items = user_row[user_row.isna()].index.tolist()
    if not unseen_items:
        return pd.Series(dtype=float)


    sim_scores = user_sim.loc[target_user].drop(target_user)

    preds = {}
    for item in unseen_items:
        # get users who rated this item
        item_ratings = user_item_matrix[item].dropna()
        common_users = item_ratings.index
        # weights = similarity with target user
        weights = sim_scores.loc[common_users]
        if weights.abs().sum() == 0:
            preds[item] = np.nan
        else:
            preds[item] = np.dot(item_ratings.values, weights.values) / (weights.abs().sum())
    preds = pd.Series(preds).dropna().sort_values(ascending=False)
    return preds.head(top_n)

def build_item_tfidf(items_df):
    texts = (items_df["category"] + " " + items_df["desc"]).values
    vectorizer = TfidfVectorizer(stop_words="english")
    X = vectorizer.fit_transform(texts)
    return vectorizer, X  # X is item-by-features

def user_profile_vector(target_user, ratings_df, items_df, vectorizer, X_items):
    """Average (or weighted) TF-IDF vectors of items the user rated highly."""
    user_rates = ratings_df[ratings_df.user_id == target_user]
    if user_rates.empty:
        return None

    liked = user_rates[user_rates.rating >= 4]
    use_df = liked if not liked.empty else user_rates


    id_to_idx = {iid: idx for idx, iid in enumerate(items_df.item_id.values)}
    idxs = [id_to_idx[i] for i in use_df.item_id.values if i in id_to_idx]
    if not idxs:
        return None


    weights = use_df.set_index("item_id").rating
    mat = X_items[idxs].toarray()
    w = np.array([weights.get(items_df.item_id.iloc[i], 1.0) for i in idxs]).reshape(-1, 1)
    prof = (mat * w).mean(axis=0, keepdims=True)
    return prof

def recommend_content(target_user, ratings_df, items_df, vectorizer, X_items, top_n=5):
    prof = user_profile_vector(target_user, ratings_df, items_df, vectorizer, X_items)
    if prof is None:
        return pd.Series(dtype=float)

    sims = cosine_similarity(prof, X_items).flatten()
    scores = pd.Series(sims, index=items_df.item_id.values)


    seen = ratings_df[ratings_df.user_id == target_user].item_id.unique()
    scores = scores.drop(labels=seen, errors="ignore")

    return scores.sort_values(ascending=False).head(top_n)


def blended_recommendations(target_user, ratings_df, items_df, alpha=0.6, top_n=5):
    """
    alpha = weight for collaborative filtering (0..1)
    (1-alpha) = weight for content-based
    """
    ui = build_user_item_matrix(ratings_df)
    sim = user_similarity(ui)

    cf_scores = recommend_cf(target_user, ui, sim, top_n=1000)
    vec, X = build_item_tfidf(items_df)
    cb_scores = recommend_content(target_user, ratings_df, items_df, vec, X, top_n=1000)


    all_items = set(cf_scores.index).union(set(cb_scores.index))
    df = pd.DataFrame({
        "cf": pd.Series(cf_scores),
        "cb": pd.Series(cb_scores)
    }, index=sorted(all_items))


    for col in ["cf", "cb"]:
        if df[col].notna().sum() > 0:
            m, M = df[col].min(), df[col].max()
            if M > m:
                df[col] = (df[col] - m) / (M - m)
            else:
                df[col] = 0.0
        else:
            df[col] = 0.0

    df["score"] = alpha * df["cf"].fillna(0) + (1 - alpha) * df["cb"].fillna(0)
    df = df.sort_values("score", ascending=False).head(top_n)


    id_to_name = items.set_index("item_id")["name"].to_dict()
    df["item_name"] = df.index.map(id_to_name)
    return df[["item_name", "cf", "cb", "score"]]


if __name__ == "__main__":
    target_user_id = 1  # change this to 2,3,4,5 and see results
    recs = blended_recommendations(target_user_id, ratings, items, alpha=0.6, top_n=5)

    print(f"\nRecommendations for user {target_user_id} (blend of CF + Content):\n")
    for i, row in recs.iterrows():
        print(f"- {row['item_name']}  | CF={row['cf']:.2f}  CB={row['cb']:.2f}  FINAL={row['score']:.2f}")



Recommendations for user 1 (blend of CF + Content):

- Laptop Stand  | CF=1.00  CB=1.00  FINAL=1.00
- Bluetooth Speaker  | CF=0.47  CB=0.38  FINAL=0.44
- Yoga Mat  | CF=0.47  CB=0.00  FINAL=0.28
- Gaming Mouse Pad  | CF=0.00  CB=0.44  FINAL=0.18
- Dumbbell Set  | CF=nan  CB=0.00  FINAL=0.00
