In [1]:
!pip uninstall numpy -y
!pip install numpy==1.23.5
!pip install scikit-surprise --upgrade


Found existing installation: numpy 1.23.5
Uninstalling numpy-1.23.5:
  Successfully uninstalled numpy-1.23.5
Collecting numpy==1.23.5
  Using cached numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)
Using cached numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
Installing collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
jax 0.5.2 requires numpy>=1.25, but you have numpy 1.23.5 which is incompatible.
jaxlib 0.5.1 requires numpy>=1.25, but you have numpy 1.23.5 which is incompatible.
numba 0.61.0 requires numpy<2.2,>=1.24, but you have numpy 1.23.5 which is incompatible.
chex 0.1.89 requires numpy>=1.24.1, but you have numpy 1.23.5 which is incompatible.
treescope 0.1.9 requires numpy>=1.25.2, but you have numpy 1.23.5 which is incompatible.
scikit-image 0.25.2 r



In [1]:
from surprise import Reader, Dataset, SVD
from surprise.model_selection import train_test_split


In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from surprise import Reader, Dataset, SVD
from surprise.model_selection import train_test_split


In [3]:
movies = pd.read_csv('/content/movies.csv')
ratings = pd.read_csv('/content/ratings.csv')  # this file you need from MovieLens or your dataset


In [4]:
movies['genres'] = movies['genres'].str.replace('|', ' ', regex=False)
ratings = ratings[['userId', 'movieId', 'rating']]


In [5]:
# Prepare Movies Data
movies['genres'] = movies['genres'].str.replace('|', ' ', regex=False)

# Prepare Ratings Data (Collaborative Filtering)
ratings = ratings[['userId', 'movieId', 'rating']]


In [6]:
# TF-IDF Vectorization
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['genres'])

# Calculate Cosine Similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Movie indices
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()

# Content-based recommendation function
def get_content_recommendations(title, cosine_sim=cosine_sim):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:11]
    movie_indices = [i[0] for i in sim_scores]
    return movies['title'].iloc[movie_indices]

# Example Usage:
print(get_content_recommendations('Toy Story (1995)'))


2203                                           Antz (1998)
3021                                    Toy Story 2 (1999)
3653        Adventures of Rocky and Bullwinkle, The (2000)
3912                      Emperor's New Groove, The (2000)
4780                                 Monsters, Inc. (2001)
9949     DuckTales: The Movie - Treasure of the Lost La...
10773                                     Wild, The (2006)
11604                               Shrek the Third (2007)
12969                       Tale of Despereaux, The (2008)
17431    Asterix and the Vikings (Astérix et les Viking...
Name: title, dtype: object


In [7]:
# Surprise library dataset preparation
reader = Reader(rating_scale=(ratings.rating.min(), ratings.rating.max()))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Split data into train and test
trainset, testset = train_test_split(data, test_size=0.25)

# SVD Model (Collaborative Filtering)
svd = SVD()
svd.fit(trainset)

# Example prediction (userId=1, movieId=1)
print("Predicted rating:", svd.predict(uid=1, iid=1).est)


Predicted rating: 3.8277376887176113


In [13]:
def hybrid_movie_recommendation(user_id, movie_title_partial, alpha=0.7, top_n=10):
    # Partial match of movie title (case-insensitive)
    matched_movies = movies[movies['title'].str.contains(movie_title_partial, case=False, regex=False)]

    if matched_movies.empty:
        return "No matching movies found. Please try again with another title."

    # Take the best matching movie
    selected_movie_title = matched_movies.iloc[0]['title']
    idx = indices[selected_movie_title]

    content_scores = cosine_sim[idx]

    movie_ids = movies['movieId'].values
    cf_scores = np.array([svd.predict(user_id, movie_id).est for movie_id in movie_ids])

    content_scores_norm = (content_scores - content_scores.min()) / (content_scores.max() - content_scores.min())
    cf_scores_norm = (cf_scores - cf_scores.min()) / (cf_scores.max() - cf_scores.min())

    hybrid_scores = alpha * cf_scores_norm + (1 - alpha) * content_scores_norm

    recommendations_df = pd.DataFrame({
        'title': movies['title'],
        'hybrid_score': hybrid_scores
    }).sort_values(by='hybrid_score', ascending=False)

    recommendations_df = recommendations_df[recommendations_df['title'] != selected_movie_title]

    return {
        'selected_movie': selected_movie_title,
        'recommendations': recommendations_df.head(top_n).reset_index(drop=True)
    }

# Example usage:
result = hybrid_movie_recommendation(user_id=100, movie_title_partial='Spider-man')

print("✅ Selected Movie:", result['selected_movie'])
print("\n🎬 Recommendations:")
print(result['recommendations'])


✅ Selected Movie: Spider-Man (2002)

🎬 Recommendations:
                                               title  hybrid_score
0                                Blade Runner (1982)      0.867739
1                       2001: A Space Odyssey (1968)      0.850207
2                             Children of Men (2006)      0.842008
3          Star Wars: Episode IV - A New Hope (1977)      0.814023
4                                    Serenity (2005)      0.810500
5  Star Wars: Episode V - The Empire Strikes Back...      0.799526
6  Stargate SG-1 Children of the Gods - Final Cut...      0.796850
7                           Time Machine, The (1960)      0.792053
8                                 The Martian (2015)      0.791123
9                            Mad Max Renegade (2011)      0.790928
