<a href="https://colab.research.google.com/github/Abhi10jan/EaseLogi/blob/main/Movie_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

# Load datasets
movies = pd.read_csv("movies.csv")  # Movie details
ratings = pd.read_csv("ratings.csv")  # User ratings
tags = pd.read_csv("tags.csv")  # User-generated tags
links = pd.read_csv("links.csv")  # External links

# Merge relevant datasets
movie_tags = tags.groupby("movieId")["tag"].apply(lambda x: " ".join(x)).reset_index()
movies = movies.merge(movie_tags, on="movieId", how="left")
movies["tag"] = movies["tag"].fillna("")  # Fill missing tags with empty string

# Preview dataset
print(movies.head())


   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  \
0  Adventure|Animation|Children|Comedy|Fantasy   
1                   Adventure|Children|Fantasy   
2                               Comedy|Romance   
3                         Comedy|Drama|Romance   
4                                       Comedy   

                                            tag  
0                               pixar pixar fun  
1  fantasy magic board game Robin Williams game  
2                                     moldy old  
3                                                
4                              pregnancy remake  


In [2]:
# Combine genres and tags into a single column for content-based filtering
movies["content"] = movies["genres"] + " " + movies["tag"]

# Use TF-IDF vectorizer
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(movies["content"])

# Convert to a compressed sparse row format for efficient computation
tfidf_matrix = csr_matrix(tfidf_matrix)

In [3]:
# Create a user-movie ratings matrix
user_movie_ratings = ratings.pivot(index="movieId", columns="userId", values="rating").fillna(0)

# Convert to a sparse matrix format
user_movie_sparse = csr_matrix(user_movie_ratings.values)

# Apply KNN
knn_model = NearestNeighbors(metric="cosine", algorithm="brute", n_neighbors=10)
knn_model.fit(user_movie_sparse)

In [4]:
import difflib

def find_closest_movie(title, movie_list):
    """
    Find the closest matching movie title using fuzzy string matching.
    """
    title = title.lower().strip()
    closest_match = difflib.get_close_matches(title, movie_list, n=1, cutoff=0.6)
    return closest_match[0] if closest_match else None

def recommend_hybrid(movie_title, num_recommendations=10):
    """
    Hybrid recommendation system using content-based and collaborative filtering.
    """
    # Normalize input movie title
    movie_list = movies["title"].str.lower().tolist()
    matched_title = find_closest_movie(movie_title, movie_list)

    if not matched_title:
        return f"Movie '{movie_title}' not found. Please try a different title."

    # Get the correct index from original movies dataframe
    movie_idx = movies[movies["title"].str.lower() == matched_title].index[0]

    # Content-Based Filtering using KNN
    content_knn = NearestNeighbors(metric="cosine", algorithm="brute", n_neighbors=10)
    content_knn.fit(tfidf_matrix)
    _, content_indices = content_knn.kneighbors(tfidf_matrix[movie_idx], n_neighbors=num_recommendations)

    # Collaborative Filtering using KNN
    movie_id = movies.iloc[movie_idx]["movieId"]
    if movie_id in user_movie_ratings.index:
        rating_idx = list(user_movie_ratings.index).index(movie_id)
        _, collab_indices = knn_model.kneighbors(user_movie_sparse[rating_idx], n_neighbors=num_recommendations)
        collab_movie_ids = user_movie_ratings.iloc[collab_indices[0]].index.tolist()
    else:
        collab_movie_ids = []

    # Combine both recommendations
    content_recommendations = movies.iloc[content_indices[0]]["title"].tolist()
    collab_recommendations = movies[movies["movieId"].isin(collab_movie_ids)]["title"].tolist()

    # Merge and remove duplicates
    final_recommendations = list(set(content_recommendations + collab_recommendations))[:num_recommendations]

    return final_recommendations

# Example usage with case-insensitive input
movie_input = input("Enter a movie title: ")
recommendations = recommend_hybrid(movie_input)
print(f"Recommended Movies: {recommendations}")


Enter a movie title: avenger
Recommended Movies: Movie 'avenger' not found. Please try a different title.
