In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
# Load the dataset
df = pd.read_csv(r'https://raw.githubusercontent.com/YBI-Foundation/Dataset/main/Movies%20Recommendation.csv')

# Features for content-based filtering
features = ['Movie_Genre', 'Movie_Language', 'Movie_Popularity', 'Movie_Runtime', 'Movie_Vote', 'Movie_Vote_Count']

# Fill NaN values with empty strings or zeros for numerical columns
df[features] = df[features].fillna('')  # For categorical features
df['Movie_Vote_Count'] = df['Movie_Vote_Count'].fillna(0)  # For numerical features


In [3]:
# Combine features into a single column
df['combined_features'] = df.apply(lambda row: ' '.join([str(row[feature]) for feature in features]), axis=1)


In [4]:
# Create a TF-IDF matrix based on combined features
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(df['combined_features'])


In [5]:
# Calculate cosine similarity between movies based on features
movie_similarity = cosine_similarity(tfidf_matrix)

# Create a DataFrame with movie similarity values
movie_similarity_df = pd.DataFrame(movie_similarity, index=df['Movie_ID'], columns=df['Movie_ID'])

In [6]:
# Function to get movie recommendations using content-based filtering
def get_content_based_recommendations(movie_id, num_recommendations=5):
    similar_movies = movie_similarity_df[movie_id].sort_values(ascending=False)[1:num_recommendations+1]
    recommended_movies = df[df['Movie_ID'].isin(similar_movies.index)][['Movie_ID', 'Movie_Title']]
    return recommended_movies

In [23]:
# Example usage of content-based filtering recommendation system
movie_id = 1000
content_based_recommendations = get_content_based_recommendations(movie_id)
print(f"Your Recommendations for Movie with id {movie_id} are:")
print(content_based_recommendations)

Your Recommendations for Movie with id 1000 are:
      Movie_ID Movie_Title
209        210     Monster
430        431  Eraserhead
2246      2247     Mirrors
3188      3189   You Again
4394      4406  Rudderless
