<a href="https://colab.research.google.com/github/FaisalMinawi/CODSOFT/blob/main/RecommendationSystem.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [47]:
!pip install scikit-surprise



In [48]:
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split
from surprise import accuracy
import csv
import pandas as pd

In [49]:
# Specify the path to the ratings CSV file
csv_file_path = 'ratings.csv'

# Open the CSV file and skip the header row
with open(csv_file_path, newline='') as csvfile:
    csv_reader = csv.reader(csvfile)
    next(csv_reader)  # Skip the header row
    ratings_data = [row for row in csv_reader]

# Define the reader with the correct format
reader = Reader(line_format='user item rating timestamp', sep=',', rating_scale=(1, 5))

# Create a Pandas DataFrame from the processed data
df = pd.DataFrame(ratings_data, columns=['user', 'item', 'rating', 'timestamp'])

# Load the dataset from the Pandas DataFrame
data = Dataset.load_from_df(df[['user', 'item', 'rating']], reader=reader)

In [50]:
# Split the data into a training set and a test set
trainset, testset = train_test_split(data, test_size=0.2)

In [51]:
# Build a user-based collaborative filtering model
sim_options = {'name': 'cosine', 'user_based': True}
model = KNNBasic(sim_options=sim_options)

In [52]:
# Train the model on the training set
model.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7e419d26a710>

In [53]:
# Make predictions on the test set
predictions = model.test(testset)

In [54]:
# Evaluate the model
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)

print(f'RMSE: {rmse}')
print(f'MAE: {mae}')

RMSE: 0.9728
MAE:  0.7490
RMSE: 0.972802818191092
MAE: 0.7489703746543493


In [55]:
# Recommend movies for a specific user (replace 'user_id' with the actual user ID)
user_id = '1'
user_movies = data.build_full_trainset().ur[user_id]
unseen_movies = [movie for movie in data.build_full_trainset().all_items() if movie not in user_movies]
user_ratings = [model.predict(user_id, movie).est for movie in unseen_movies]

In [57]:
# Get top N movie recommendations
N = 10

# Map internal indices to actual movie IDs
trainset = data.build_full_trainset()
movie_ids = [trainset.to_raw_iid(movie_idx) for movie_idx in unseen_movies]

top_movie_indices = sorted(range(len(user_ratings)), key=lambda i: user_ratings[i], reverse=True)[:N]

print(f'Recommended movies for User {user_id}:')
for i, movie_idx in enumerate(top_movie_indices):
    movie_id = movie_ids[movie_idx]
    print(f'{i + 1}: Movie ID {movie_id}')


Recommended movies for User 1:
1: Movie ID 1
2: Movie ID 3
3: Movie ID 6
4: Movie ID 47
5: Movie ID 50
6: Movie ID 70
7: Movie ID 101
8: Movie ID 110
9: Movie ID 151
10: Movie ID 157
