<a href="https://colab.research.google.com/github/Jayjoshina/Recommender-system/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install ipywidgets

Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m31.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: jedi
Successfully installed jedi-0.19.2


In [3]:
from IPython.display import display
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import linear_kernel
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix
from google.colab import files
import ipywidgets as widgets

In [4]:
# --- Load datasets ---
movies = pd.read_csv("/content/movies.csv")
ratings = pd.read_csv("/content/ratings.csv")

In [5]:
# --- Content-Based TF-IDF ---
movies['content'] = movies['title'].fillna('') + ' ' + movies['genres'].fillna('').str.replace('|', ' ')
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['content'])
content_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

movieId_to_idx = {mid: idx for idx, mid in enumerate(movies['movieId'].tolist())}
idx_to_movieId = {idx: mid for mid, idx in movieId_to_idx.items()}


In [6]:
# --- Collaborative Filtering with TruncatedSVD ---
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
sparse_matrix = csr_matrix(user_item_matrix.values)

svd_model = TruncatedSVD(n_components=50, random_state=42)
svd_matrix = svd_model.fit_transform(sparse_matrix)

In [7]:
def collaborative_score(user_id, movie_id):
    try:
        user_idx = user_item_matrix.index.get_loc(user_id)
        movie_idx = user_item_matrix.columns.get_loc(movie_id)
        return np.dot(svd_matrix[user_idx], svd_model.components_[:, movie_idx])
    except:
        return 0.0

# --- User history mapping ---
user_history = ratings.groupby('userId')['movieId'].apply(list).to_dict()
user_history_idx = {u: [movieId_to_idx[m] for m in hist if m in movieId_to_idx] for u, hist in user_history.items()}

# --- Content score for user ---
def content_score_for_user(candidate_movie_idx, user_history_movie_idxs):
    if not user_history_movie_idxs:
        return 0.0
    sims = content_sim[candidate_movie_idx, user_history_movie_idxs]
    return float(np.max(sims))

In [8]:
# --- Hybrid recommender ---
def recommend_hybrid(user_id, top_k=10, alpha=0.7):
    rated = set(user_history.get(user_id, []))
    candidates = [m for m in movies['movieId'] if m not in rated]

    collab_scores = np.array([collaborative_score(user_id, mid) for mid in candidates])
    content_scores = np.array([content_score_for_user(movieId_to_idx[mid], user_history_idx.get(user_id, [])) for mid in candidates])

    scaler = MinMaxScaler()
    collab_norm = scaler.fit_transform(collab_scores.reshape(-1, 1)).flatten()
    content_norm = scaler.fit_transform(content_scores.reshape(-1, 1)).flatten()

    hybrid_scores = alpha * collab_norm + (1 - alpha) * content_norm
    top_idx = np.argsort(hybrid_scores)[::-1][:top_k]

    result_df = pd.DataFrame({
        'Movie Title': [movies[movies['movieId'] == candidates[i]]['title'].values[0] for i in top_idx],
        'Hybrid Score': [round(hybrid_scores[i], 4) for i in top_idx]
    })

    return result_df

In [9]:
# Example run without UI
recommend_hybrid(user_id=1, top_k=10, alpha=0.7).head(10)


Unnamed: 0,Movie Title,Hybrid Score
0,Die Hard (1988),0.8124
1,Toy Story 2 (1999),0.7313
2,Terminator 2: Judgment Day (1991),0.702
3,"Godfather: Part II, The (1974)",0.6894
4,Scream (1996),0.6806
5,Jaws (1975),0.6579
6,Stand by Me (1986),0.6566
7,"Godfather, The (1972)",0.6502
8,Teenage Mutant Ninja Turtles (1990),0.6313
9,"Lost World: Jurassic Park, The (1997)",0.6263
