<a href="https://colab.research.google.com/github/Ricksondsouza-bit/Movie-Recommandation-system/blob/main/Movie_Recommandation_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [12]:
df = pd.read_csv('movies.csv')

In [13]:
print("Dataset shape:", df.shape)
print("\nFirst 5 rows:")
print(df.head())

Dataset shape: (4803, 24)

First 5 rows:
   index     budget                                    genres  \
0      0  237000000  Action Adventure Fantasy Science Fiction   
1      1  300000000                  Adventure Fantasy Action   
2      2  245000000                    Action Adventure Crime   
3      3  250000000               Action Crime Drama Thriller   
4      4  260000000          Action Adventure Science Fiction   

                                       homepage      id  \
0                   http://www.avatarmovie.com/   19995   
1  http://disney.go.com/disneypictures/pirates/     285   
2   http://www.sonypictures.com/movies/spectre/  206647   
3            http://www.thedarkknightrises.com/   49026   
4          http://movies.disney.com/john-carter   49529   

                                            keywords original_language  \
0  culture clash future space war space colony so...                en   
1  ocean drug abuse exotic island east india trad...             

In [14]:
df['genres'] = df['genres'].str.split('|')
print("\nSample genres (split):", df['genres'].iloc[0])


Sample genres (split): ['Action Adventure Fantasy Science Fiction']


In [15]:
df['genres_str'] = df['genres'].apply(lambda x: ' '.join(x) if isinstance(x, list) else '')

In [16]:
tfidf = TfidfVectorizer(stop_words='english', max_features=100)  # Limit features for simplicity
tfidf_matrix = tfidf.fit_transform(df['genres_str'])

In [17]:
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
print("Cosine similarity matrix shape:", cosine_sim.shape)

Cosine similarity matrix shape: (4803, 4803)


In [18]:
def recommend_movies(title, df=df, cosine_sim=cosine_sim, top_k=5):
    """
    Recommend top_k movies similar to the input 'title' based on genres.
    """

    idx = df[df['title'].str.contains(title, case=False, na=False)].index
    if len(idx) == 0:
        return f"No movie found with title containing '{title}'"

    idx = idx[0]

    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)


    sim_scores = sim_scores[1:top_k+1]
    movie_indices = [i[0] for i in sim_scores]

    rec_df = df.iloc[movie_indices][['title', 'genres']].copy()
    rec_df['similarity_score'] = [round(i[1], 3) for i in sim_scores]
    return rec_df

In [19]:
recommendations = recommend_movies("the Avengers")
print("\nRecommendations for 'the Avengers':")
print(recommendations)


Recommendations for 'the Avengers':
                                  title                              genres  \
7               Avengers: Age of Ultron  [Action Adventure Science Fiction]   
16                         The Avengers  [Science Fiction Action Adventure]   
26           Captain America: Civil War  [Adventure Action Science Fiction]   
31                           Iron Man 3  [Action Adventure Science Fiction]   
35  Transformers: Revenge of the Fallen  [Science Fiction Action Adventure]   

    similarity_score  
7                1.0  
16               1.0  
26               1.0  
31               1.0  
35               1.0  


In [10]:
# Let the user input a movie title
user_title = input("Enter a movie title to get recommendations: ")
print(f"\nRecommendations for '{user_title}':")
print(recommend_movies(user_title))

Enter a movie title to get recommendations: rush hour

Recommendations for 'rush hour':
                            title                          genres  \
345                   Rush Hour 2  [Action Comedy Crime Thriller]   
433                         RED 2  [Action Comedy Crime Thriller]   
1771               The 51st State  [Thriller Action Comedy Crime]   
1991                     Bad Boys  [Action Comedy Crime Thriller]   
2196  The Man Who Knew Too Little  [Comedy Thriller Crime Action]   

      similarity_score  
345                1.0  
433                1.0  
1771               1.0  
1991               1.0  
2196               1.0  
