<a href="https://colab.research.google.com/github/Raboooiii/Sem6/blob/main/LabCycle_Q6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

# Load MovieLens dataset
ratings = pd.read_csv(
    "https://files.grouplens.org/datasets/movielens/ml-100k/u.data",
    sep='\t',
    names=['user_id', 'item_id', 'rating', 'timestamp']
)

ratings.head()

ratings.info()
ratings.describe()

user_item_matrix = ratings.pivot_table(
    index='user_id',
    columns='item_id',
    values='rating'
)

user_item_matrix.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype
---  ------     --------------   -----
 0   user_id    100000 non-null  int64
 1   item_id    100000 non-null  int64
 2   rating     100000 non-null  int64
 3   timestamp  100000 non-null  int64
dtypes: int64(4)
memory usage: 3.1 MB


item_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,,,,,,,,,,
2,4.0,,,,,,,,,2.0,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,3.0,,,,,,,,,...,,,,,,,,,,


In [2]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Fill missing values with 0
matrix_filled = user_item_matrix.fillna(0)

# Compute cosine similarity
user_similarity = cosine_similarity(matrix_filled)

user_similarity_df = pd.DataFrame(
    user_similarity,
    index=user_item_matrix.index,
    columns=user_item_matrix.index
)

user_similarity_df.head()


def recommend_movies_user_based(user_id, n_recommendations=5):
    similar_users = user_similarity_df[user_id].sort_values(ascending=False)[1:6]

    similar_users_ratings = matrix_filled.loc[similar_users.index]

    weighted_ratings = similar_users_ratings.T.dot(similar_users.values)

    already_rated = matrix_filled.loc[user_id]

    recommendations = weighted_ratings[already_rated == 0]

    return recommendations.sort_values(ascending=False).head(n_recommendations)

recommend_movies_user_based(1)

user_similarity_pearson = matrix_filled.T.corr()

In [3]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(ratings, test_size=0.2, random_state=42)

from sklearn.metrics import mean_squared_error

def predict_rating(user, item):
    if item in matrix_filled.columns:
        return matrix_filled.loc[user].mean()
    return 3

predictions = []
actuals = []

for row in test.itertuples():
    predictions.append(predict_rating(row.user_id, row.item_id))
    actuals.append(row.rating)

rmse = np.sqrt(mean_squared_error(actuals, predictions))
rmse

np.float64(3.321419689302548)

In [4]:
item_similarity = cosine_similarity(matrix_filled.T)

item_similarity_df = pd.DataFrame(
    item_similarity,
    index=user_item_matrix.columns,
    columns=user_item_matrix.columns
)

def recommend_movies_item_based(user_id, n_recommendations=5):
    user_ratings = matrix_filled.loc[user_id]
    rated_items = user_ratings[user_ratings > 0]

    scores = pd.Series(dtype=float)

    for item, rating in rated_items.items():
        similar_items = item_similarity_df[item]
        scores = scores.add(similar_items * rating, fill_value=0)

    scores = scores.drop(rated_items.index)

    return scores.sort_values(ascending=False).head(n_recommendations)

recommend_movies_item_based(1)

Unnamed: 0_level_0,0
item_id,Unnamed: 1_level_1
423,394.094117
655,368.216701
568,367.805253
403,364.943551
385,363.508132


In [5]:
def hybrid_recommendation(user_id, alpha=0.5):
    user_scores = recommend_movies_user_based(user_id)
    item_scores = recommend_movies_item_based(user_id)

    hybrid_scores = alpha * user_scores + (1 - alpha) * item_scores

    return hybrid_scores.sort_values(ascending=False).head(5)

hybrid_recommendation(1)

Unnamed: 0_level_0,0
item_id,Unnamed: 1_level_1
273,
382,
385,
403,
423,


# **Evaluation & Interpretation**

**Analysis**

  * User-based works well when users have similar behavior.
  * Item-based works better in large systems.
  * Hybrid improves accuracy.

**Interpretation**

Recommended movies:
  * Similar genre
  * Popular among similar users
  * Frequently co-rated

**Improvements**

  * Use Matrix Factorization (SVD)
  * Deep learning models
  * Incorporate content-based features
  * Use implicit feedback