# Movie Recommendation System using Collaborative Filtering and Matrix Factorization
This notebook implements a recommendation system using:
- **User-Based and Item-Based Collaborative Filtering**
- **Matrix Factorization (ALS - Alternating Least Squares)**
Dataset: **MovieLens 100k**

In [2]:
pip install pandas numpy matplotlib seaborn scipy implicit


Collecting implicit
  Using cached implicit-0.7.2.tar.gz (70 kB)
  Installing build dependencies: started
  Installing build dependencies: still running...
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: implicit
  Building wheel for implicit (pyproject.toml): started
  Building wheel for implicit (pyproject.toml): finished with status 'error'
Failed to build implicit
Note: you may need to restart the kernel to use updated packages.


  error: subprocess-exited-with-error
  
  Building wheel for implicit (pyproject.toml) did not run successfully.
  exit code: 1
  
  [505 lines of output]
  
  
  --------------------------------------------------------------------------------
  -- Trying 'Ninja (Visual Studio 17 2022 x64 v144)' generator
  --------------------------------
  ---------------------------
  ----------------------
  -----------------
  ------------
  -------
  --
  Not searching for unused variables given on the command line.
    Compatibility with CMake < 3.10 will be removed from a future version of
    CMake.
  
    Update the VERSION argument <min> value.  Or, use the <min>...<max> syntax
    to tell CMake that the project requires at least <min> but has been updated
    to work with policies introduced by <max> or earlier.
  
  
  -- The C compiler identification is unknown
  CMake Error at CMakeLists.txt:3 (ENABLE_LANGUAGE):
    No CMAKE_C_COMPILER could be found.
  
    Tell CMake where to find the

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
from implicit.als import AlternatingLeastSquares

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

print("Libraries Loaded!")

ModuleNotFoundError: No module named 'implicit'

In [None]:
# Load MovieLens 100k dataset
columns = ['user_id', 'item_id', 'rating', 'timestamp']
ratings = pd.read_csv("https://files.grouplens.org/datasets/movielens/ml-100k/u.data", 
                      sep="\t", names=columns)

movies = pd.read_csv("https://files.grouplens.org/datasets/movielens/ml-100k/u.item", 
                      sep="|", encoding="latin-1", header=None, usecols=[0, 1], names=['item_id', 'title'])

# Merge datasets
df = ratings.merge(movies, on="item_id")

# Drop timestamp column
df.drop(columns=['timestamp'], inplace=True)

# Display dataset info
df.head()

In [None]:
# Create User-Item Matrix
user_item_matrix = df.pivot(index="user_id", columns="title", values="rating")

# Compute similarity matrices
user_sim_matrix = pd.DataFrame(cosine_similarity(user_item_matrix.fillna(0)), 
                               index=user_item_matrix.index, columns=user_item_matrix.index)

item_sim_matrix = pd.DataFrame(cosine_similarity(user_item_matrix.fillna(0).T), 
                               index=user_item_matrix.columns, columns=user_item_matrix.columns)

print("User-Item Matrix and Similarity Matrices Created!")

In [None]:
# Function to get user-based recommendations
def get_user_based_recommendations(user_id, n=5):
    similar_users = user_sim_matrix[user_id].sort_values(ascending=False).index[1:n+1]
    recommended_movies = df[df["user_id"].isin(similar_users)].groupby("title")["rating"].mean().sort_values(ascending=False)
    return recommended_movies.head(n)

# Example: Recommend for user 5
get_user_based_recommendations(user_id=5)

In [None]:
# Function to get item-based recommendations
def get_item_based_recommendations(movie_title, n=5):
    similar_movies = item_sim_matrix[movie_title].sort_values(ascending=False).index[1:n+1]
    return similar_movies

# Example: Recommend movies similar to "Star Wars (1977)"
get_item_based_recommendations("Star Wars (1977)")

In [None]:
# Convert dataframe to sparse matrix for ALS model
user_item_sparse = csr_matrix(user_item_matrix.fillna(0).values)

# Train ALS model
als_model = AlternatingLeastSquares(factors=50, regularization=0.1, iterations=20)
als_model.fit(user_item_sparse.T)

# Function to get ALS recommendations
def get_als_recommendations(user_id, n=5):
    user_idx = user_id - 1  # Adjust for 0-based index
    recommendations = als_model.recommend(user_idx, user_item_sparse, N=n)
    recommended_movies = [user_item_matrix.columns[i] for i, _ in recommendations]
    return recommended_movies

# Example: Recommend for user 5
get_als_recommendations(user_id=5)