In [68]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [70]:
# Step 1: Loading the dataset


ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv')

In [72]:
# Step 2: Creating user-item rating matrix


rating_matrix = ratings.pivot_table(index='userId', columns='movieId', values='rating')

print (rating_matrix)

movieId  1       2       3       4       5       6       7       8       \
userId                                                                    
1           4.0     NaN     4.0     NaN     NaN     4.0     NaN     NaN   
2           NaN     NaN     NaN     NaN     NaN     NaN     NaN     NaN   
3           NaN     NaN     NaN     NaN     NaN     NaN     NaN     NaN   
4           NaN     NaN     NaN     NaN     NaN     NaN     NaN     NaN   
5           4.0     NaN     NaN     NaN     NaN     NaN     NaN     NaN   
...         ...     ...     ...     ...     ...     ...     ...     ...   
606         2.5     NaN     NaN     NaN     NaN     NaN     2.5     NaN   
607         4.0     NaN     NaN     NaN     NaN     NaN     NaN     NaN   
608         2.5     2.0     2.0     NaN     NaN     NaN     NaN     NaN   
609         3.0     NaN     NaN     NaN     NaN     NaN     NaN     NaN   
610         5.0     NaN     NaN     NaN     NaN     5.0     NaN     NaN   

movieId  9       10     

In [74]:
# Step 3: Mean centering the matrix (for Adjusted Cosine Similarity)


rating_matrix_centered = rating_matrix.sub(rating_matrix.mean(axis=1), axis=0)

print (rating_matrix_centered)

movieId    1         2         3       4       5         6         7       \
userId                                                                      
1       -0.366379       NaN -0.366379     NaN     NaN -0.366379       NaN   
2             NaN       NaN       NaN     NaN     NaN       NaN       NaN   
3             NaN       NaN       NaN     NaN     NaN       NaN       NaN   
4             NaN       NaN       NaN     NaN     NaN       NaN       NaN   
5        0.363636       NaN       NaN     NaN     NaN       NaN       NaN   
...           ...       ...       ...     ...     ...       ...       ...   
606     -1.157399       NaN       NaN     NaN     NaN       NaN -1.157399   
607      0.213904       NaN       NaN     NaN     NaN       NaN       NaN   
608     -0.634176 -1.134176 -1.134176     NaN     NaN       NaN       NaN   
609     -0.270270       NaN       NaN     NaN     NaN       NaN       NaN   
610      1.311444       NaN       NaN     NaN     NaN  1.311444       NaN   

In [76]:
# Step 4: Replacing the NaNs with 0 for similarity computation


rating_filled = rating_matrix_centered.fillna(0)


print (rating_filled)

movieId    1         2         3       4       5         6         7       \
userId                                                                      
1       -0.366379  0.000000 -0.366379     0.0     0.0 -0.366379  0.000000   
2        0.000000  0.000000  0.000000     0.0     0.0  0.000000  0.000000   
3        0.000000  0.000000  0.000000     0.0     0.0  0.000000  0.000000   
4        0.000000  0.000000  0.000000     0.0     0.0  0.000000  0.000000   
5        0.363636  0.000000  0.000000     0.0     0.0  0.000000  0.000000   
...           ...       ...       ...     ...     ...       ...       ...   
606     -1.157399  0.000000  0.000000     0.0     0.0  0.000000 -1.157399   
607      0.213904  0.000000  0.000000     0.0     0.0  0.000000  0.000000   
608     -0.634176 -1.134176 -1.134176     0.0     0.0  0.000000  0.000000   
609     -0.270270  0.000000  0.000000     0.0     0.0  0.000000  0.000000   
610      1.311444  0.000000  0.000000     0.0     0.0  1.311444  0.000000   

In [78]:
# Step 5: Computing Adjusted Cosine Similarity (item-item)


item_sim = cosine_similarity(rating_filled.T)
item_sim_df = pd.DataFrame(item_sim, index=rating_filled.columns, columns=rating_filled.columns)

print (item_sim_df)

movieId    1         2         3         4         5         6         7       \
movieId                                                                         
1        1.000000  0.061511  0.051260 -0.036175 -0.140422  0.089142 -0.039411   
2        0.061511  1.000000  0.056252 -0.106873  0.081893 -0.031387  0.020577   
3        0.051260  0.056252  1.000000 -0.035193  0.100357 -0.008854  0.060727   
4       -0.036175 -0.106873 -0.035193  1.000000  0.021928 -0.017004  0.007410   
5       -0.140422  0.081893  0.100357  0.021928  1.000000 -0.052783  0.146651   
...           ...       ...       ...       ...       ...       ...       ...   
193581   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
193583   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
193585   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
193587   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
193609   0.000000  0.000000 

In [82]:
# Step 6: Creating function to recommend top-5 similar movies



def recommend_similar_movies(movie_name, top_n=5):
    
    # Getting movieId for a given movie name
    
    try:
        movie_id = movies[movies['title'].str.contains(movie_name, case=False, regex=False)].iloc[0]['movieId']
    except IndexError:
        print(f"No movie found for '{movie_name}'")
        return

    # Getting similarity scores for that movie
    
    if movie_id not in item_sim_df:
        print("Movie ID not found in similarity matrix.")
        return

    sim_scores = item_sim_df[movie_id].drop(movie_id)
    top_movies = sim_scores.sort_values(ascending=False).head(top_n)

# Printing recommended movies
    
    print(f"\nTop {top_n} recommendations for '{movie_name}':")
    for mid in top_movies.index:
        title = movies[movies['movieId'] == mid].iloc[0]['title']
        print(f"- {title} (Similarity: {top_movies[mid]:.3f})")

In [84]:
# Example usage 1 = Toy Story


movie_input = input("Enter a movie name: ")
recommend_similar_movies(movie_input)

Enter a movie name:  Toy story



Top 5 recommendations for 'Toy story':
- Toy Story 2 (1999) (Similarity: 0.403)
- Toy Story 3 (2010) (Similarity: 0.327)
- Aladdin (1992) (Similarity: 0.327)
- Wallace & Gromit: The Wrong Trousers (1993) (Similarity: 0.305)
- Back to the Future (1985) (Similarity: 0.277)


In [86]:
# Example usage 2 = Jumanji


movie_input = input("Enter a movie name: ")
recommend_similar_movies(movie_input)

Enter a movie name:  Jumanji



Top 5 recommendations for 'Jumanji':
- Loser (1991) (Similarity: 0.288)
- Return from Witch Mountain (1978) (Similarity: 0.267)
- Bad Words (2013) (Similarity: 0.264)
- Timeline (2003) (Similarity: 0.263)
- Synecdoche, New York (2008) (Similarity: 0.258)


In [88]:
# Example usage 3 = Titanic


movie_input = input("Enter a movie name: ")
recommend_similar_movies(movie_input)

Enter a movie name:  Titanic



Top 5 recommendations for 'Titanic':
- Miami Vice (2006) (Similarity: 0.276)
- Batman & Robin (1997) (Similarity: 0.236)
- Teenage Mutant Ninja Turtles III (1993) (Similarity: 0.235)
- Daddy Day Camp (2007) (Similarity: 0.226)
- Hamlet 2 (2008) (Similarity: 0.226)


In [90]:
# Example usage 4 = Batman


movie_input = input("Enter a movie name: ")
recommend_similar_movies(movie_input)

Enter a movie name:  Batman



Top 5 recommendations for 'Batman':
- Batman & Robin (1997) (Similarity: 0.313)
- My Crazy Life (Mi vida loca) (1993) (Similarity: 0.306)
- Clockers (1995) (Similarity: 0.291)
- Batman Returns (1992) (Similarity: 0.257)
- Replacement Killers, The (1998) (Similarity: 0.254)


In [92]:
# failed Example usage 5 = blahhhh


movie_input = input("Enter a movie name: ")
recommend_similar_movies(movie_input)

Enter a movie name:  blahhhh


No movie found for 'blahhhh'
