<a href="https://colab.research.google.com/github/Dhwaj-054/lit-college-codes/blob/main/RS_Expt5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip -q install pandas numpy scikit-learn scipy

import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix


In [None]:
data = {
    "ItemA": [4, 4, 0, 0, 1],
    "ItemB": [4, 0, 0, 2, 1],
    "ItemC": [5, 2, 4, 0, 0],
    "ItemD": [0, 3, 5, 3, 0],
    "ItemE": [1, 0, 2, 4, 0],
}
user_item_matrix = pd.DataFrame(
    data, index=["User1", "User2", "User3", "User4", "User5"]
)
print("User-Item Interaction Matrix:")
display(user_item_matrix)


User-Item Interaction Matrix:


Unnamed: 0,ItemA,ItemB,ItemC,ItemD,ItemE
User1,4,4,5,0,1
User2,4,0,2,3,0
User3,0,0,4,5,2
User4,0,2,0,3,4
User5,1,1,0,0,0


In [None]:
#itemâ€“item cosine similarity
item_matrix = user_item_matrix.T
similarity_matrix = cosine_similarity(item_matrix)
similarity_df = pd.DataFrame(
    similarity_matrix,
    index=item_matrix.index,
    columns=item_matrix.index
)

print("Item-Item Similarity Matrix (Cosine):")
display(similarity_df.round(6))


Item-Item Similarity Matrix (Cosine):


Unnamed: 0,ItemA,ItemB,ItemC,ItemD,ItemE
ItemA,1.0,0.645777,0.726599,0.318559,0.151947
ItemB,0.645777,1.0,0.6506,0.199667,0.571429
ItemC,0.726599,0.6506,1.0,0.591062,0.42289
ItemD,0.318559,0.199667,0.591062,1.0,0.732114
ItemE,0.151947,0.571429,0.42289,0.732114,1.0


In [None]:
#item-based CF

def recommend_items(user_id, top_n=3, min_sim=0.0):
    """
    Score unseen items for the given user using a weighted sum of item similarities
    times the user's ratings on seen items.
    """
    if user_id not in user_item_matrix.index:
        raise ValueError(f"Unknown user_id: {user_id}")

    user_ratings = user_item_matrix.loc[user_id]
    rated_items = user_ratings[user_ratings > 0].index.tolist()

    scores = {}
    for it in rated_items:

        sims = similarity_df[it].drop(it)
        sims = sims[sims >= min_sim]
        for new_item, sim in sims.items():
            if new_item in rated_items:
                continue
            scores[new_item] = scores.get(new_item, 0.0) + sim * user_ratings[it]

    ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    return [it for it, _ in ranked[:top_n]]

def predict_rating(user_id, item_id):
    """
    Predict rating for a given user and item using similarity-weighted average
    over the user's rated items.
    """
    if user_id not in user_item_matrix.index:
        raise ValueError(f"Unknown user_id: {user_id}")
    if item_id not in user_item_matrix.columns:
        raise ValueError(f"Unknown item_id: {item_id}")

    user_ratings = user_item_matrix.loc[user_id]
    rated_items = user_ratings[user_ratings > 0].index.tolist()

    num, den = 0.0, 0.0
    for it in rated_items:
        sim = similarity_df.loc[item_id, it]
        num += sim * user_ratings[it]
        den += abs(sim)
    return num / den if den != 0 else 0.0


In [None]:
print("Recommendations for User1:")
print(recommend_items("User1", top_n=3))

print("\nPredicted Rating of User1 for ItemE:")
print(predict_rating("User1", "ItemE"))


Recommendations for User1:
['ItemD']

Predicted Rating of User1 for ItemE:
2.7992588419247078


In [None]:
from typing import Literal

def normalize_matrix(df: pd.DataFrame, axis: Literal["user","item","none"]="none"):
    """
    Returns a normalized copy:
    - 'user': subtract user mean from each row (centering users)
    - 'item': subtract item mean from each column (centering items)
    - 'none': no normalization
    """
    X = df.copy().astype(float)
    if axis == "user":
        means = X.replace(0, np.nan).mean(axis=1)
        X = X.sub(means, axis=0).fillna(0.0)
    elif axis == "item":
        means = X.replace(0, np.nan).mean(axis=0)
        X = X.sub(means, axis=1).fillna(0.0)
    return X

#item based
norm_user_item = normalize_matrix(user_item_matrix, axis="item")
item_mat_norm = norm_user_item.T
sim_norm = cosine_similarity(item_mat_norm)
similarity_df_norm = pd.DataFrame(sim_norm, index=item_mat_norm.index, columns=item_mat_norm.index)

print("Item-Item Similarity (item-centered):")
display(similarity_df_norm.round(6))


Item-Item Similarity (item-centered):


Unnamed: 0,ItemA,ItemB,ItemC,ItemD,ItemE
ItemA,1.0,0.517549,0.61774,0.037547,-0.155265
ItemB,0.517549,1.0,0.516552,-0.119185,0.421429
ItemC,0.61774,0.516552,1.0,0.411129,0.200603
ItemD,0.037547,-0.119185,0.411129,1.0,0.627019
ItemE,-0.155265,0.421429,0.200603,0.627019,1.0


In [None]:
def to_sparse(df: pd.DataFrame):
    arr = df.values
    sp = csr_matrix(arr)
    return sp

sp_matrix = to_sparse(user_item_matrix)
sp_matrix


<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 15 stored elements and shape (5, 5)>

In [None]:
def topk_item_sim(sim_df: pd.DataFrame, k: int = 10, include_self=False):
    """
    Keep only top-k similarities per item; others set to 0.
    """
    items = sim_df.index
    pruned = pd.DataFrame(0.0, index=items, columns=items)
    for it in items:
        sims = sim_df.loc[it].copy()
        if not include_self:
            sims = sims.drop(it)
        topk = sims.nlargest(k)
        pruned.loc[it, topk.index] = topk.values
        if include_self:
            pruned.loc[it, it] = sim_df.loc[it, it]
    return pruned

similarity_df_topk = topk_item_sim(similarity_df, k=2, include_self=False)
print("Top-2 item neighbors (cosine):")
display(similarity_df_topk.round(6))


Top-2 item neighbors (cosine):


Unnamed: 0,ItemA,ItemB,ItemC,ItemD,ItemE
ItemA,0.0,0.645777,0.726599,0.0,0.0
ItemB,0.645777,0.0,0.6506,0.0,0.0
ItemC,0.726599,0.6506,0.0,0.0,0.0
ItemD,0.0,0.0,0.591062,0.0,0.732114
ItemE,0.0,0.571429,0.0,0.732114,0.0


In [None]:
# Recommender functions using top-K pruned similarity
def recommend_items_topk(user_id, top_n=3, sim_df=similarity_df_topk):
    user_ratings = user_item_matrix.loc[user_id]
    rated = user_ratings[user_ratings > 0].index.tolist()
    scores = {}
    for it in rated:
        sims = sim_df[it]
        for new_item, sim in sims.items():
            if new_item in rated or sim <= 0:
                continue
            scores[new_item] = scores.get(new_item, 0.0) + sim * user_ratings[it]
    ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    return [it for it, _ in ranked[:top_n]]

def predict_rating_topk(user_id, item_id, sim_df=similarity_df_topk):
    user_ratings = user_item_matrix.loc[user_id]
    rated = user_ratings[user_ratings > 0].index.tolist()
    num, den = 0.0, 0.0
    for it in rated:
        sim = sim_df.loc[item_id, it]
        if sim > 0:
            num += sim * user_ratings[it]
            den += abs(sim)
    return num / den if den != 0 else 0.0

print("Top-K Recommendations for User1:")
print(recommend_items_topk("User1", top_n=3))

print("\nTop-K Predicted Rating of User1 for ItemE:")
print(predict_rating_topk("User1", "ItemE"))


Top-K Recommendations for User1:
['ItemD']

Top-K Predicted Rating of User1 for ItemE:
4.0
