In [None]:
!pip uninstall -y numpy
!pip install numpy==1.23.5 scikit-surprise --no-cache-dir



Found existing installation: numpy 1.23.5
Uninstalling numpy-1.23.5:
  Successfully uninstalled numpy-1.23.5
Collecting numpy==1.23.5
  Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)
Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m177.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
blosc2 3.2.0 requires numpy>=1.26, but you have numpy 1.23.5 which is incompatible.
xarray 2025.1.2 requires numpy>=1.24, but you have numpy 1.23.5 which is incompatible.
bigframes 1.40.0 requires numpy>=1.24.0, but you have numpy 1.23.5 which is incompatible.
jaxlib 0.5.1 requires numpy>=1.25, but you have numpy 1.23.5 whi

# Import Libraries

In [None]:

import pandas as pd
import numpy as np
import pickle
import nltk
import re
import warnings
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.sentiment import SentimentIntensityAnalyzer

warnings.filterwarnings("ignore")
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

# Get dataset path

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("grouplens/movielens-20m-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/grouplens/movielens-20m-dataset?dataset_version_number=1...


100%|██████████| 195M/195M [00:01<00:00, 165MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/grouplens/movielens-20m-dataset/versions/1


# Load Dataset

In [None]:
# Load MovieLens dataset
movies = pd.read_csv("/content/movie.csv")
ratings = pd.read_csv("/content/rating.csv")

Merge Dataset

In [None]:
# Merge datasets
data = ratings.merge(movies, on="movieId")

# Collaborative Filtering

In [None]:
# Collaborative Filtering (Matrix Factorization using SVD)
reader = Reader(rating_scale=(0.5, 5.0))
dataset = Dataset.load_from_df(data[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(dataset, test_size=0.2, random_state=42)

model_cf = SVD()
model_cf.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7e1bf0e0dd90>

#Content-Based Filtering

In [None]:
# Content-Based Filtering using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
movie_tfidf = vectorizer.fit_transform(movies['title'])
cosine_sim = cosine_similarity(movie_tfidf, movie_tfidf)

# Sentiment Analysis

In [None]:
# Sentiment Analysis
sanalyzer = SentimentIntensityAnalyzer()
def analyze_sentiment(review):
    return sanalyzer.polarity_scores(review)['compound']

# Recommendation Function

In [None]:
# Recommendation Function
def hybrid_recommend(user_id, num_recommendations=5):
    user_movies = data[data['userId'] == user_id]['movieId'].tolist()

    pred_ratings = [(mid, model_cf.predict(user_id, mid).est) for mid in movies['movieId'] if mid not in user_movies]
    pred_ratings = sorted(pred_ratings, key=lambda x: x[1], reverse=True)[:num_recommendations]

    recommended_movies = []
    for movie_id, rating in pred_ratings:
        similar_movies = list(enumerate(cosine_sim[movies[movies['movieId'] == movie_id].index[0]]))
        similar_movies = sorted(similar_movies, key=lambda x: x[1], reverse=True)[:3]

        movie_title = movies[movies['movieId'] == movie_id]['title'].values[0]
        recommended_movies.append((movie_title, rating))

    return recommended_movies

# Save models for Flask

In [None]:
# Save models for Flask
with open("hybrid_model.pkl", "wb") as f:
    pickle.dump(model_cf, f)

with open("cosine_sim.pkl", "wb") as f:
    pickle.dump(cosine_sim, f)

with open("vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)

# Test Recommendation System

In [None]:
!pip install tabulate

# Test Recommendation System
from tabulate import tabulate

table = hybrid_recommend(user_id=1)
print(tabulate(table, headers=["Movie", "Predicted Rating"]))

Movie                                      Predicted Rating
---------------------------------------  ------------------
Cosmos (1980)                                       4.50483
Black Mirror (2011)                                 4.48393
Thin Blue Line, The (1988)                          4.474
Baraka (1992)                                       4.4558
My Life in Pink (Ma vie en rose) (1997)             4.44077
