## Import Libraries

In [6]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


## Load the Dataset

In [7]:

movies = pd.read_csv('/kaggle/input/movielens-latest-small-for-education/ml-latest-small/movies.csv')
ratings = pd.read_csv('/kaggle/input/movielens-latest-small-for-education/ml-latest-small/ratings.csv')


## Explore the Data

In [13]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [None]:
ratings.head()


In [None]:
movies.isnull().sum()
ratings.isnull().sum()


In [None]:
print("Unique users:", ratings['userId'].nunique())
print("Unique movies:", ratings['movieId'].nunique())


## Create User-Item Matrix

In [9]:
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
user_item_matrix.head()


movieId  1       2       3       4       5       6       7       8       \
userId                                                                    
1           4.0     0.0     4.0     0.0     0.0     4.0     0.0     0.0   
2           0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
3           0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
4           0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
5           4.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   

movieId  9       10      ...  193565  193567  193571  193573  193579  193581  \
userId                   ...                                                   
1           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0   
2           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0   
3           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0   
4           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0

## Compute Item Similarity

In [10]:
item_matrix = user_item_matrix.T
item_similarity = cosine_similarity(item_matrix)
item_similarity = pd.DataFrame(item_similarity, index=item_matrix.index, columns=item_matrix.index)
print(item_similarity.head())


movieId    1         2         3         4         5         6         7       \
movieId                                                                         
1        1.000000  0.410562  0.296917  0.035573  0.308762  0.376316  0.277491   
2        0.410562  1.000000  0.282438  0.106415  0.287795  0.297009  0.228576   
3        0.296917  0.282438  1.000000  0.092406  0.417802  0.284257  0.402831   
4        0.035573  0.106415  0.092406  1.000000  0.188376  0.089685  0.275035   
5        0.308762  0.287795  0.417802  0.188376  1.000000  0.298969  0.474002   

movieId    8         9         10      ...  193565  193567  193571  193573  \
movieId                                ...                                   
1        0.131629  0.232586  0.395573  ...     0.0     0.0     0.0     0.0   
2        0.172498  0.044835  0.417693  ...     0.0     0.0     0.0     0.0   
3        0.313434  0.304840  0.242954  ...     0.0     0.0     0.0     0.0   
4        0.158022  0.000000  0.095598  ...

## Item-Based Recommendation Function

In [11]:
def recommend_item(movie_id, top_n=5):
    sim_scores = item_similarity[movie_id]
    top_movies = sim_scores.sort_values(ascending=False).drop(movie_id).head(top_n)
    recommended_movies = movies[movies['movieId'].isin(top_movies.index)][['title']]
    recommended_movies['similarity_score'] = top_movies.values
    return recommended_movies


## Test Recommendations

In [14]:
print("Movies similar to Jumanji  (1995):")
print(recommend_item(2, top_n=5))


Movies similar to Jumanji  (1995):
                     title  similarity_score
322  Lion King, The (1994)          0.588438
325       Mask, The (1994)          0.549818
418   Jurassic Park (1993)          0.544981
436  Mrs. Doubtfire (1993)          0.538046
504      Home Alone (1990)          0.524876
