In [1]:
pip install pandas surprise


Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl.metadata (327 bytes)
Collecting scikit-surprise (from surprise)
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357255 sha256=91b8e37436a27410a86e28cd1b5116637877e7907b91ff054470c074a296ae84
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise


In [20]:
import pandas as pd

# Create the ratings dataframe
ratings_data = {
    'userId': [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5],
    'movieId': [101, 102, 103, 101, 104, 105, 101, 102, 106, 103, 104, 107, 105, 106, 107],
    'rating': [5, 3, 2, 4, 5, 1, 2, 4, 5, 3, 4, 2, 5, 3, 4]
}
ratings_df = pd.DataFrame(ratings_data)
ratings_df.to_csv('ratings.csv', index=False)

# Create the movies dataframe
movies_data = {
    'movieId': [101, 102, 103, 104, 105, 106, 107],
    'title': ['Mission Impossible', 'Phir hera pheri', 'Fast and Furious', 'Mr. Bean', 'Harry Potters', 'Hunger Games', 'Interstellar'],
    'genre': ['Action', 'Comedy', 'Action', 'Comedy', 'Fantasy', 'Fantasy', 'Sci-Fi']
}
movies_df = pd.DataFrame(movies_data)
movies_df.to_csv('movies.csv', index=False)


In [21]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split, cross_validate

# Load data
ratings_data = pd.read_csv('ratings.csv')
movies_data = pd.read_csv('movies.csv')

# Define a reader
reader = Reader(rating_scale=(1, 5))

# Load the data into Surprise
dataset = Dataset.load_from_df(ratings_data[['userId', 'movieId', 'rating']], reader)

# Split the dataset into train and test sets
trainset, testset = train_test_split(dataset, test_size=0.25)

# Use the SVD algorithm for collaborative filtering
algo = SVD()

# Train the algorithm on the trainset
algo.fit(trainset)

# Evaluate the algorithm on the testset
predictions = algo.test(testset)

# Cross-validate the model
cross_validate(algo, dataset, measures=['RMSE', 'MAE'], cv=5, verbose=True)

# Function to get top N recommendations for a user
def get_top_n_recommendations(userId, n=10):
    # Get a list of all movie IDs
    movie_ids = movies_data['movieId'].unique()

    # Predict ratings for all movies for the given user
    predictions = [algo.predict(userId, mid) for mid in movie_ids]

    # Sort predictions by estimated rating
    predictions.sort(key=lambda x: x.est, reverse=True)

    # Get the top N recommendations
    top_n_recommendations = predictions[:n]

    # Get movie details for the top N recommendations
    top_movies = [(movies_data.loc[movies_data['movieId'] == pred.iid]['title'].values[0], pred.est) for pred in top_n_recommendations]

    return top_movies

# Get top 10 recommendations for user with ID 1
user_id = 1
top_recommendations = get_top_n_recommendations(user_id, n=10)
print(f"Top 10 recommendations for user {user_id}:")
for title, rating in top_recommendations:
    print(f"Movie Title: {title}, Predicted Rating: {rating}")


Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.5928  1.4288  1.5884  2.1107  1.2727  1.3987  0.4918  
MAE (testset)     0.5841  1.3589  1.5757  1.8685  1.1526  1.3079  0.4328  
Fit time          0.00    0.00    0.00    0.00    0.00    0.00    0.00    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    
Top 10 recommendations for user 1:
Movie Title: Mission Impossible, Predicted Rating: 3.746709069596183
Movie Title: Mr. Bean, Predicted Rating: 3.468561257980013
Movie Title: Hunger Games, Predicted Rating: 3.393804389357499
Movie Title: Interstellar, Predicted Rating: 3.3772162720333028
Movie Title: Phir hera pheri, Predicted Rating: 3.2343993596500287
Movie Title: Fast and Furious, Predicted Rating: 3.180337960176656
Movie Title: Harry Potters, Predicted Rating: 3.031954017987335
