##Create a machine learning model that can recommend movies to users based on their preferences.

In [None]:
!pip install scikit-surprise



In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
from surprise import Dataset, Reader
from surprise import SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
import pandas as pd

In [None]:
df=pd.read_csv("/content/gdrive/MyDrive/Dataset/movies.csv")
df

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
10324,146684,Cosmic Scrat-tastrophe (2015),Animation|Children|Comedy
10325,146878,Le Grand Restaurant (1966),Comedy
10326,148238,A Very Murray Christmas (2015),Comedy
10327,148626,The Big Short (2015),Drama


In [None]:
df = df.dropna()  # Remove rows with missing values
df = df.drop_duplicates()  # Check for duplicate entries and remove them

In [None]:
df = df.fillna(0)  # Fill missing values with 0

In [None]:
df.head(10)

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
5,6,Heat (1995),Action|Crime|Thriller
6,7,Sabrina (1995),Comedy|Romance
7,8,Tom and Huck (1995),Adventure|Children
8,9,Sudden Death (1995),Action
9,10,GoldenEye (1995),Action|Adventure|Thriller


In [None]:
user_item_matrix = df.pivot(index='movieId', columns='title', values='genres')

In [None]:
#spliting the data
reader = Reader(rating_scale=(0.5, 5))
data = Dataset.load_from_df(df[['genres', 'title', 'movieId']], reader)
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [None]:
#training the split data
algo = SVD(n_factors=100, n_epochs=20, lr_all=0.005, reg_all=0.02)
algo.fit(trainset)
print("Model trained successfully!")

Model trained successfully!


In [None]:
#evaluating model's performance using Root Mean Squared Error
predictions = algo.test(testset)
rmse = accuracy.rmse(predictions)
print(f"Root Mean Squared Error (RMSE): {rmse}")

RMSE: 49981.2745
Root Mean Squared Error (RMSE): 49981.2745412002


In [None]:
# Make Recommendations
user_id = 1
user_data = df[df['movieId'] == user_id]
user_rated_movie_ids = set(user_data['movieId'])

In [None]:
#to find non rated movie
all_movie_ids = set(df['movieId'])
unrated_movie_ids = all_movie_ids - user_rated_movie_ids

In [None]:
# Predict ratings for unrated movies
unrated_movie_predictions = [(user_id, movie_id, algo.predict(user_id, movie_id).est) for movie_id in unrated_movie_ids]

In [None]:
# Sort predictions and recommend top N movies
N = 10
top_n_recommendations = sorted(unrated_movie_predictions, key=lambda x: x[2], reverse=True)[:N]

In [None]:
#to print top 10 movies recommended
print("Top Movie Recommendations:")
for _, movie_id, predicted_rating in top_n_recommendations:
    movie_title = df[df['movieId'] == movie_id]['title'].values[0]
    print(f"Movie: {movie_title}, Predicted Rating: {predicted_rating}")

Top Movie Recommendations:
Movie: So Big! (1932), Predicted Rating: 5
Movie: Jumanji (1995), Predicted Rating: 5
Movie: Grumpier Old Men (1995), Predicted Rating: 5
Movie: Waiting to Exhale (1995), Predicted Rating: 5
Movie: Father of the Bride Part II (1995), Predicted Rating: 5
Movie: Heat (1995), Predicted Rating: 5
Movie: Sabrina (1995), Predicted Rating: 5
Movie: Tom and Huck (1995), Predicted Rating: 5
Movie: Sudden Death (1995), Predicted Rating: 5
Movie: GoldenEye (1995), Predicted Rating: 5


In [None]:
# Input new user ratings
new_user_ratings = {
    'So Big! (1932)': 4.5,
    'Jumanji (1995)': 3.0,
    'Heat (1995)': 5.0
}

# Predict ratings for unrated movies
unrated_movie_predictions = [
    (user_id, movie_id, algo.predict(user_id, movie_id).est)
    for movie_id in unrated_movie_ids
]

# Add new user ratings to predictions
for movie_title, rating in new_user_ratings.items():
    movie_id = df[df['title'] == movie_title]['movieId'].values[0]
    unrated_movie_predictions.append((user_id, movie_id, rating))

# Sort predictions and recommend top N movies
N = 10
top_n_recommendations = sorted(unrated_movie_predictions, key=lambda x: x[2], reverse=True)[:N]

