In [1]:
import pandas as pd

In [7]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [8]:
# Load the cleaned dataset

In [9]:
data = pd.read_csv("Movies_Final.csv")

In [11]:
data.head()

Unnamed: 0,director_name,num_critic_for_reviews,duration,actor_2_name,gross,genres,actor_1_name,movie_title,num_voted_users,cast_total_facebook_likes,actor_3_name,facenumber_in_poster,plot_keywords,movie_imdb_link,num_user_for_reviews,language,country,budget,title_year,imdb_score
0,James Cameron,723.0,178.0,Joel David Moore,760505847.0,Action|Adventure|Fantasy|Sci-Fi,CCH Pounder,Avatar,886204,4834,Wes Studi,0.0,avatar|future|marine|native|paraplegic,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,3054.0,English,USA,237000000.0,2009.0,7.9
1,Gore Verbinski,302.0,169.0,Orlando Bloom,309404152.0,Action|Adventure|Fantasy,Johnny Depp,Pirates of the Caribbean: At World's End,471220,48350,Jack Davenport,0.0,goddess|marriage ceremony|marriage proposal|pi...,http://www.imdb.com/title/tt0449088/?ref_=fn_t...,1238.0,English,USA,300000000.0,2007.0,7.1
2,Sam Mendes,602.0,148.0,Rory Kinnear,200074175.0,Action|Adventure|Thriller,Christoph Waltz,Spectre,275868,11700,Stephanie Sigman,1.0,bomb|espionage|sequel|spy|terrorist,http://www.imdb.com/title/tt2379713/?ref_=fn_t...,994.0,English,UK,245000000.0,2015.0,6.8
3,Christopher Nolan,813.0,164.0,Christian Bale,448130642.0,Action|Thriller,Tom Hardy,The Dark Knight Rises,1144337,106759,Joseph Gordon-Levitt,0.0,deception|imprisonment|lawlessness|police offi...,http://www.imdb.com/title/tt1345836/?ref_=fn_t...,2701.0,English,USA,250000000.0,2012.0,8.5
4,Doug Walker,,,Rob Walker,,Documentary,Doug Walker,Star Wars: Episode VII - The Force Awakens ...,8,143,,0.0,,http://www.imdb.com/title/tt5289954/?ref_=fn_t...,,,,,,7.1


In [None]:
# Clean the 'movie_title' column by stripping extra spaces and converting to lowercase

In [10]:
data['movie_title'] = data['movie_title'].str.strip().str.lower()

In [None]:
# Fill any NaN values with an empty string

In [11]:
data['director_name'] = data['director_name'].fillna('')
data['genres'] = data['genres'].fillna('')

In [None]:
# Combine relevant features into a single column

In [12]:
data['combined_features'] = data['genres'] + ' ' + data['director_name']


In [27]:
# Convert the combined text data to a matrix of token counts

In [13]:
count_vectorizer = CountVectorizer(stop_words='english')
count_matrix = count_vectorizer.fit_transform(data['combined_features'])

In [None]:
# Compute cosine similarity based on the count matrix

In [14]:
cosine_sim = cosine_similarity(count_matrix, count_matrix)


In [None]:
# Function to get movie recommendations based on similarity scores

In [15]:
def get_recommendations(title, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = data[data['movie_title'].str.lower() == title.lower()].index[0]

    # Get the similarity scores for all movies
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores (from highest to lowest)
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of the top 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie titles
    movie_indices = [i[0] for i in sim_scores]
    
    return data['movie_title'].iloc[movie_indices]


In [None]:
# Test the recommendation system

In [28]:
movie_name = 'Avatar'  # You can replace 'Avatar' with any movie title
recommended_movies = get_recommendations(movie_name)
print(f"Movies similar to '{movie_name}':\n")
print(recommended_movies)


Movies similar to 'Avatar':

2486                                          aliens
288                       terminator 2: judgment day
3575                                  the terminator
1702                           dragonball: evolution
95                           guardians of the galaxy
15                                      man of steel
39                          the amazing spider-man 2
236     star wars: episode iii - revenge of the sith
237     star wars: episode ii - attack of the clones
239                                    the wolverine
Name: movie_title, dtype: object


In [None]:
# Other Examples

In [29]:
movie_name = 'Inception'
recommended_movies = get_recommendations(movie_name)
print(f"Movies similar to '{movie_name}':\n")
print(recommended_movies)

Movies similar to 'Inception':

96                     interstellar
968                i am number four
1367                   the 5th wave
3             the dark knight rises
120                   batman begins
28                       battleship
29                   jurassic world
57                 star trek beyond
76                       waterworld
77      g.i. joe: the rise of cobra
Name: movie_title, dtype: object


In [30]:
movie_name = 'The Dark Knight'
recommended_movies = get_recommendations(movie_name)
print(f"Movies similar to '{movie_name}':\n")
print(recommended_movies)

Movies similar to 'The Dark Knight':

3299       the way of the gun
3       the dark knight rises
2075                    rogue
2828               the border
3376               the player
3507                   nikita
2842               young guns
177                miami vice
1066                 insomnia
2316      a most violent year
Name: movie_title, dtype: object


In [31]:
movie_name = 'Titanic'
recommended_movies = get_recommendations(movie_name)
print(f"Movies similar to '{movie_name}':\n")
print(recommended_movies)

Movies similar to 'Titanic':

2582                         the immigrant
2666                the remains of the day
2986                            two lovers
3387    the city of your final destination
3396                           howards end
4057                    a room with a view
4171                               maurice
1444                         hit the floor
3303            the bold and the beautiful
3419                     wuthering heights
Name: movie_title, dtype: object


In [None]:
movie_name ='avengers'
recommend_movie = get_recommendation(movie_name)
print(f"movies similar to '{movie_naame}'\n")
print(recommended_movie)