<a href="https://colab.research.google.com/github/Abhinav4518/Abhinav4518/blob/main/Task3Week3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix


In [3]:
try:
    movies = pd.read_csv('movies.csv')
    ratings = pd.read_csv('ratings.csv')
except FileNotFoundError:
    print("Error: 'movies.csv' or 'ratings.csv' not found. Please download the dataset and place files in the same directory.")
    exit()


In [4]:
df = pd.merge(ratings, movies, on='movieId')

print("\nMerged Dataset Head:")
print(df.head())




Merged Dataset Head:
   userId  movieId rating     timestamp  \
0       1      296    5.0  1.147880e+09   
1       1      306    3.5  1.147869e+09   
2       1      307    5.0  1.147869e+09   
3       1      665    5.0  1.147879e+09   
4       1      899    3.5  1.147869e+09   

                                              title  \
0                               Pulp Fiction (1994)   
1  Three Colors: Red (Trois couleurs: Rouge) (1994)   
2  Three Colors: Blue (Trois couleurs: Bleu) (1993)   
3                                Underground (1995)   
4                        Singin' in the Rain (1952)   

                        genres  
0  Comedy|Crime|Drama|Thriller  
1                        Drama  
2                        Drama  
3             Comedy|Drama|War  
4       Comedy|Musical|Romance  


In [6]:
print("\nOriginal data type of 'rating' column:", df['rating'].dtype)



Original data type of 'rating' column: object


In [7]:
df['rating'] = pd.to_numeric(df['rating'], errors='coerce')

In [8]:
df.dropna(subset=['rating'], inplace=True)

In [9]:
print("New data type of 'rating' column:", df['rating'].dtype)

New data type of 'rating' column: float64


In [10]:
movie_matrix = df.pivot_table(index='title', columns='userId', values='rating').fillna(0)

In [11]:
movie_matrix_sparse = csr_matrix(movie_matrix.values)

print("\nCreated user-item interaction matrix.")



Created user-item interaction matrix.


In [12]:
movie_similarity = cosine_similarity(movie_matrix_sparse)
print("\nComputed movie-movie similarity matrix.")


Computed movie-movie similarity matrix.


In [13]:
def get_recommendations(movie_title, similarity_matrix=movie_similarity, matrix=movie_matrix, num_recommendations=10):
    """
    Recommends movies based on a given movie title.
    """
    try:
        # Get the index of the movie that matches the title
        movie_index = matrix.index.get_loc(movie_title)

        # Get the pairwise similarity scores of all movies with that movie
        similarity_scores = list(enumerate(similarity_matrix[movie_index]))

        # Sort the movies based on the similarity scores
        similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

        # Get the scores of the top 'num_recommendations' most similar movies (excluding the movie itself)
        top_movies_indices = [i[0] for i in similarity_scores[1:num_recommendations+1]]

        # Return the top 'num_recommendations' most similar movie titles
        recommended_movies = matrix.index[top_movies_indices].tolist()
        return recommended_movies

    except KeyError:
        return f"Error: Movie '{movie_title}' not found in the dataset. Please check the spelling or try another movie."


In [14]:
movie_to_recommend = 'Toy Story (1995)'
recommendations = get_recommendations(movie_to_recommend)

In [15]:
if isinstance(recommendations, list):
    for i, movie in enumerate(recommendations, 1):
        print(f"{i}. {movie}")
else:
    print(recommendations)


1. Back to the Future (1985)
2. Star Wars: Episode IV - A New Hope (1977)
3. Star Wars: Episode VI - Return of the Jedi (1983)
4. Independence Day (a.k.a. ID4) (1996)
5. Pulp Fiction (1994)
6. Lion King, The (1994)
7. Forrest Gump (1994)
8. Star Wars: Episode V - The Empire Strikes Back (1980)
9. Silence of the Lambs, The (1991)
10. Jurassic Park (1993)


In [16]:
movie_to_recommend_2 = 'Jumanji (1995)'
recommendations_2 = get_recommendations(movie_to_recommend_2)

print(f"\n--- Top 10 Movie Recommendations for '{movie_to_recommend_2}' ---")
if isinstance(recommendations_2, list):
    for i, movie in enumerate(recommendations_2, 1):
        print(f"{i}. {movie}")
else:
    print(recommendations_2)


--- Top 10 Movie Recommendations for 'Jumanji (1995)' ---
1. Home Alone (1990)
2. Jurassic Park (1993)
3. Mask, The (1994)
4. Cliffhanger (1993)
5. Mrs. Doubtfire (1993)
6. Speed (1994)
7. Lion King, The (1994)
8. Batman (1989)
9. Forrest Gump (1994)
10. Beauty and the Beast (1991)
