In [1]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
from nltk.stem.porter import PorterStemmer
import ast

In [17]:
# Function to load the data
def load_data(file_path):
    return pd.read_csv(file_path)

In [18]:
# funciton to clean all the data
def clean_data(movies_df, credits_df):
    movies_df = movies_df.merge(credits_df, on='title')
    movies_df = movies_df[['movie_id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew']]
    movies_df.dropna(inplace=True)
    return movies_df

In [19]:
# we convert the represting string of genres and keywords to a list of names
def convert(obj):
    return [i['name'] for i in ast.literal_eval(obj)]

In [20]:
# we also have to covert represent cast to a list of actor names
def convert3(obj):
    counter = 0
    return [i['name'] for i in ast.literal_eval(obj) if (counter := counter + 1) <= 3]

In [23]:
# function to extract the director's names
def fetch_director(obj):
    return [i['name'] for i in ast.literal_eval(obj) if i['job'] == 'Director'][:1]

In [22]:
# stemming the text using Porter Stemmer
def stem(text):
    ps = PorterStemmer()
    return " ".join([ps.stem(i) for i in text.split()])

In [25]:
# finally, we have the recommendation system function
def recommend(movie, new_df, vectors, similarity):
    try:
        movie_index = new_df[new_df['title'] == movie].index[0]
    except IndexError:
        return []

    distances = similarity[movie_index]
    movies_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x: x[1])[1:6]

    recommended_movies = [new_df.iloc[i[0]].title for i in movies_list]
    return recommended_movies

In [9]:
# Here are the main scripts 
movies_df = load_data('movies.csv')
credits_df = load_data('credits.csv')
movies_df = clean_data(movies_df, credits_df)

In [10]:
movies_df['genres'] = movies_df['genres'].apply(convert)
movies_df['keywords'] = movies_df['keywords'].apply(convert)
movies_df['cast'] = movies_df['cast'].apply(convert3)
movies_df['crew'] = movies_df['crew'].apply(fetch_director)

In [11]:
movies_df['overview'] = movies_df['overview'].apply(lambda x: x.split())
movies_df[['genres', 'keywords', 'cast', 'crew']] = movies_df[['genres', 'keywords', 'cast', 'crew']].applymap(lambda x: [i.replace(" ", "") for i in x])

  movies_df[['genres', 'keywords', 'cast', 'crew']] = movies_df[['genres', 'keywords', 'cast', 'crew']].applymap(lambda x: [i.replace(" ", "") for i in x])


In [12]:
movies_df['tags'] = movies_df['overview'] + movies_df['genres'] + movies_df['keywords'] + movies_df['cast'] + movies_df['crew']

In [13]:
new_df = movies_df[['movie_id', 'title', 'tags']]
new_df['tags'] = new_df['tags'].apply(lambda x: ' '.join(x))
new_df['tags'] = new_df['tags'].apply(lambda x: x.lower())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags'] = new_df['tags'].apply(lambda x: ' '.join(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags'] = new_df['tags'].apply(lambda x: x.lower())


In [14]:
cv = CountVectorizer(max_features=5000, stop_words='english')
vectors = cv.fit_transform(new_df['tags']).toarray()
similarity = cosine_similarity(vectors)

In [15]:
new_df['tags'] = new_df['tags'].apply(stem)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags'] = new_df['tags'].apply(stem)


In [34]:
class MovieRecommendation:
    def system(self, movieName):
        recommended_movies = recommend(movieName, new_df, vectors, similarity)

        if recommended_movies:
            print(f"If you are watching '{movieName}' I recommend you to watch: \n")
            for sl_no, movie in enumerate(recommended_movies):
                print(sl_no + 1, movie)
        else:
            print(f"Sorry, '{movieName}' is not in the dataset. Unable to provide recommendations.")

# Here we can create an object and call the function from that obj, but first let's make some if else statement
movie_name = 'The Ultimate Gift'
if movie_name == '':
    print("Please enter some movie name!")
else:
    mrs = MovieRecommendation()
    result = mrs.system(movie_name)
    

If you are watching 'The Ultimate Gift' I recommend you to watch: 

1 Stargate: The Ark of Truth
2 The Helix... Loaded
3 Star Wars: Episode III - Revenge of the Sith
4 Tank Girl
5 The Hitchhiker's Guide to the Galaxy
