# **Movie Recommendation System**


---



# Objective

Build a movie recommendation system that suggests movies to users based on their previous ratings using collaborative filtering.


# Data Source

The MovieLens 100k Dataset contains 100,000 ratings for 1682 movies by 943 users.


# Import Library



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

# Import Data

In [None]:
# Download MovieLens dataset
!wget https://files.grouplens.org/datasets/movielens/ml-100k.zip
!unzip ml-100k.zip

# Load data
movies = pd.read_csv('ml-100k/u.item', sep='|', header=None, encoding='latin-1', usecols=[0, 1], names=['movie_id', 'title'])
ratings = pd.read_csv('ml-100k/u.data', sep='\t', header=None, usecols=[0, 1, 2], names=['user_id', 'movie_id', 'rating'])


# Describe Data



In [None]:
# Overview of the data
print("Movies Data:")
print(movies.head())

print("\nRatings Data:")
print(ratings.head())

# Data information
print("\nMovies Data Information:")
movies.info()

print("\nRatings Data Information:")
ratings.info()


# Data Visualization



In [None]:
# Visualize distribution of ratings
plt.figure(figsize=(10, 6))
sns.countplot(x='rating', data=ratings)
plt.title('Distribution of Movie Ratings')
plt.xlabel('Rating')
plt.ylabel('Count')
plt.show()


# Data Preprocessing



In [None]:
# Merge the ratings with movies dataset
data = pd.merge(ratings, movies, on='movie_id')

# Create a user-movie matrix
user_movie_matrix = data.pivot(index='user_id', columns='title', values='rating')

# Fill NaN values with 0 for model training
user_movie_matrix.fillna(0, inplace=True)


# Define Target Variable (y) and Feature Variables (x)



In [None]:
# Target variable y (user_id) and feature variables X (movie ratings)
X = user_movie_matrix.values
y = user_movie_matrix.index


# Train Test Split



In [None]:
# No explicit train-test split in collaborative filtering, but we will use K-nearest neighbors model
train_data, test_data = train_test_split(X, test_size=0.2, random_state=42)


# Modeling



In [None]:
# Build KNN model for collaborative filtering
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(train_data)


# Model Evaluation



In [None]:
# Random user and movie recommendation
user_idx = 42
distances, indices = model_knn.kneighbors([test_data[user_idx]], n_neighbors=6)

print(f"Recommendations for User {y[user_idx]}:")
for i in range(1, len(distances.flatten())):
    print(f"{i}: {user_movie_matrix.columns[indices.flatten()[i]]}")


# Prediction


In [None]:
# Predicting top recommendations for any user
def recommend_movies(user_id, n_recommendations=5):
    distances, indices = model_knn.kneighbors([test_data[user_id]], n_neighbors=n_recommendations+1)
    recommendations = [user_movie_matrix.columns[indices.flatten()[i]] for i in range(1, len(distances.flatten()))]
    return recommendations

user_id = 42
recommendations = recommend_movies(user_id)
print(f"Top {len(recommendations)} movie recommendations for User {user_id}: {recommendations}")


# Explaination

In this project, we built a movie recommendation system using collaborative filtering based on the K-nearest neighbors algorithm. We used the MovieLens 100k dataset and transformed the data into a user-item matrix, where each cell represents the rating a user gave to a particular movie. The model recommends movies to users by finding the nearest neighbors based on cosine similarity, allowing for personalized suggestions.