# Content-Based Recommendation System | By: Abdallah Wagih Ibrahim | https://www.kaggle.com/code/abdallahwagih/content-based-recommendation-system/notebook

#### Feature Extraction: Extract relevant features from the items. For example, in a movie recommendation system, features could include genre, director, actors, and plot keywords.

#### User Profile: Create a user profile based on their interactions with items. This profile is essentially a summary of the features of items the user has liked or interacted with in the past.

#### Recommendation: Calculate the similarity between the user profile and each item's features. Items that are most similar to the user profile are recommended.

In [None]:
# Import needed modules
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Read data
df = pd.read_csv('movies.csv')

In [None]:
# Selecting the relevant features for recommendation
selected_features = ['genres','keywords','tagline','cast','director']
print(selected_features)

In [None]:
# Replacing the null valuess with null string
for feature in selected_features:
    df[feature] = df[feature].fillna('')

In [None]:
# combining all the 5 selected features
combined_features = df['genres'] + ' ' + df['keywords'] + ' ' + df['tagline'] + ' ' + df['cast'] + ' ' + df['director']
combined_features

## Building content-based recommendation system

In [None]:
# converting the text data to feature vectors
vectorizer = TfidfVectorizer()

feature_vectors = vectorizer.fit_transform(combined_features)

print(feature_vectors)

### Cosine Simularity

In [None]:
# getting the similarity scores using cosine similarity
similarity = cosine_similarity(feature_vectors, feature_vectors)

print(similarity)

In [None]:
# creating a list with all the movie names given in the dataset

list_of_all_titles = df['title'].tolist()
print(list_of_all_titles)

In [None]:
# getting the movie name from the user
movie_name = input(' Enter your favourite movie name : ')

# finding the close match for the movie name given by the user
find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
print(find_close_match)

In [None]:
# finding the index of the movie with title
close_match = find_close_match[0]
index_of_the_movie = df[df.title == close_match]['index'].values[0]

In [None]:
# getting a list of similar movies
similarity_score = list(enumerate(similarity[index_of_the_movie]))
print(similarity_score)

In [None]:
# sorting the movies based on their similarity score
sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True) 
print(sorted_similar_movies)

In [None]:
top_sim = sorted_similar_movies[:5]
top_sim

In [None]:
# print the name of similar movies based on the index
i = 1

for movie in sorted_similar_movies:
    index = movie[0]
    title_from_index = df[df.index==index]['title'].values[0]
    if (i < 6):
        print(i, '-', title_from_index)
        i += 1

## Full Recommendation System

In [None]:
movie_name = input(' Enter your favourite movie name : ')

list_of_all_titles = df['title'].tolist()

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)

close_match = find_close_match[0]

index_of_the_movie = df[df.title == close_match]['index'].values[0]

similarity_score = list(enumerate(similarity[index_of_the_movie]))

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True) 

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
    index = movie[0]
    title_from_index = df[df.index==index]['title'].values[0]
    if (i < 30):
        print(i, '.',title_from_index)
        i+=1