In [1]:
pip install pandas numpy scikit-learn surprise


Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl.metadata (327 bytes)
Collecting scikit-surprise (from surprise)
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357289 sha256=1c3f74f65586addd2bc16b91129e3e3fd742ecf039deff4225c4d187667be27e
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise
Install

In [3]:
import pandas as pd
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load the dataset
movies = pd.read_csv("/content/movie.csv")  # Movie dataset
ratings = pd.read_csv("/content/rating.csv")  # Ratings dataset



In [4]:
# Collaborative Filtering - Using SVD
def collaborative_filtering():
    # Prepare data for Surprise
    reader = Reader(rating_scale=(0.5, 5.0))
    data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
    trainset, testset = train_test_split(data, test_size=0.2)

    # Build the model
    algo = SVD()
    algo.fit(trainset)

    # Predict a specific user-movie rating
    user_id = 1
    movie_id = 50
    prediction = algo.predict(user_id, movie_id)
    print(f"Predicted rating for User {user_id} on Movie {movie_id}: {prediction.est}")
    return algo



In [5]:
# Content-Based Filtering
def content_based_filtering(movie_id, top_n=5):
    # TF-IDF vectorization of movie genres
    tfidf = TfidfVectorizer(stop_words="english")
    tfidf_matrix = tfidf.fit_transform(movies["genres"])

    # Compute similarity
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

    # Get similar movies
    sim_scores = list(enumerate(cosine_sim[movie_id]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    movie_indices = [i[0] for i in sim_scores]
    return movies.iloc[movie_indices]



In [6]:
# Hybrid Recommendation System
def hybrid_recommendation(user_id, movie_id):
    algo = collaborative_filtering()
    cb_recommendations = content_based_filtering(movie_id)
    print("\nCollaborative Recommendations:")
    for i in range(5):
        print(f"Movie {i+1}: {movies.iloc[i]['title']}")
    print("\nContent-Based Recommendations:")
    print(cb_recommendations[["title", "genres"]])



In [7]:
# Interface Example
if __name__ == "__main__":
    print("Welcome to the Recommendation System!")
    print("1. Get Collaborative Filtering Recommendations")
    print("2. Get Content-Based Recommendations")
    print("3. Get Hybrid Recommendations")
    choice = int(input("Enter your choice: "))

    if choice == 1:
        collaborative_filtering()
    elif choice == 2:
        movie_id = int(input("Enter a Movie ID: "))
        recommendations = content_based_filtering(movie_id)
        print("Recommendations:\n", recommendations[["title", "genres"]])
    elif choice == 3:
        user_id = int(input("Enter User ID: "))
        movie_id = int(input("Enter a Movie ID: "))
        hybrid_recommendation(user_id, movie_id)


Welcome to the Recommendation System!
1. Get Collaborative Filtering Recommendations
2. Get Content-Based Recommendations
3. Get Hybrid Recommendations
Enter your choice: 1
Predicted rating for User 1 on Movie 50: 4.129429809569108
