In [1]:
!pip install numpy pandas scikit-learn surprise



In [2]:
import sys
print(sys.version)

3.11.11 (main, Dec  4 2024, 08:55:07) [GCC 11.4.0]


In [3]:
!pip install numpy==1.23.5



In [4]:
!pip uninstall -y scikit-surprise
!pip install scikit-surprise

Found existing installation: scikit-surprise 1.1.4
Uninstalling scikit-surprise-1.1.4:
  Successfully uninstalled scikit-surprise-1.1.4
Collecting scikit-surprise
  Using cached scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.4


In [5]:
import numpy as np
import pandas as pd
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import cross_validate

print("NumPy version:", np.__version__)
print("All libraries imported successfully!")

NumPy version: 1.23.5
All libraries imported successfully!


In [6]:
!pip install --upgrade scikit-surprise



In [7]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from surprise import Dataset, Reader, SVD
from surprise.model_selection import cross_validate

In [15]:
# Download MovieLens dataset
!wget https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
!unzip ml-latest-small.zip

# Load the dataset
movies = pd.read_csv('ml-latest-small/movies.csv')
ratings = pd.read_csv('ml-latest-small/ratings.csv')

# Display the first few rows
print(movies.head())
print(ratings.head())

--2025-03-19 08:19:44--  https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 978202 (955K) [application/zip]
Saving to: ‘ml-latest-small.zip.1’


2025-03-19 08:19:45 (5.78 MB/s) - ‘ml-latest-small.zip.1’ saved [978202/978202]

Archive:  ml-latest-small.zip
replace ml-latest-small/links.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: ml-latest-small/links.csv  
  inflating: ml-latest-small/tags.csv  
  inflating: ml-latest-small/ratings.csv  
  inflating: ml-latest-small/README.txt  
  inflating: ml-latest-small/movies.csv  
   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting 

In [16]:
# Merge movies and ratings
data = pd.merge(ratings, movies, on='movieId')

# Drop unnecessary columns
data = data[['userId', 'movieId', 'rating', 'title']]

# Check for missing values
print(data.isnull().sum())

# Remove duplicates
data.drop_duplicates(inplace=True)

# Display the processed data
print(data.head())

userId     0
movieId    0
rating     0
title      0
dtype: int64
   userId  movieId  rating                        title
0       1        1     4.0             Toy Story (1995)
1       1        3     4.0      Grumpier Old Men (1995)
2       1        6     4.0                  Heat (1995)
3       1       47     5.0  Seven (a.k.a. Se7en) (1995)
4       1       50     5.0   Usual Suspects, The (1995)


In [17]:
# Load data into Surprise format
reader = Reader(rating_scale=(0.5, 5.0))
dataset = Dataset.load_from_df(data[['userId', 'movieId', 'rating']], reader)

# Split data into train and test sets
trainset = dataset.build_full_trainset()
testset = trainset.build_anti_testset()

# Train the SVD model
model = SVD()
model.fit(trainset)

# Make predictions
predictions = model.test(testset)

# Evaluate the model using RMSE
rmse = np.sqrt(mean_squared_error([pred.r_ui for pred in predictions], [pred.est for pred in predictions]))
print(f'RMSE: {rmse}')

RMSE: 0.4852701497661241


In [21]:
def get_recommendations(user_id, n=5):
    # Get all movie IDs
    all_movie_ids = data['movieId'].unique()

    # Predict ratings for all movies
    user_ratings = [(user_id, movie_id, model.predict(user_id, movie_id).est) for movie_id in all_movie_ids]

    # Sort by predicted rating
    user_ratings.sort(key=lambda x: x[2], reverse=True)

    # Get top N recommendations
    top_n = user_ratings[:n]
    recommended_movies = [data[data['movieId'] == movie_id]['title'].iloc[0] for _, movie_id, _ in top_n]
    return recommended_movies

# Example: Get recommendations for user ID 1
print(get_recommendations(user_id=5, n=5))

['Life Is Beautiful (La Vita è bella) (1997)', 'Pulp Fiction (1994)', 'Fight Club (1999)', "Schindler's List (1993)", "Rosemary's Baby (1968)"]


In [20]:
from textblob import TextBlob

# Example: Analyze sentiment of a review
review = "This movie was fantastic!"
sentiment = TextBlob(review).sentiment.polarity
print(f'Sentiment: {sentiment}')

Sentiment: 0.5
