In [1]:
 pip install numpy pandas scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd

# Load the dataset (fixed the broken URL)
url = 'https://raw.githubusercontent.com/rashida048/Some-NLP-Projects/master/movie_dataset.csv'
df = pd.read_csv(url)

# Select relevant features for the recommendation
features = ['keywords', 'cast', 'genres', 'director']

# Fill missing values with empty strings
for feature in features:
    df[feature] = df[feature].fillna('')

# Combine the features into a single string
def combine_features(row):
    return row['keywords'] + " " + row['cast'] + " " + row['genres'] + " " + row['director']

df['combined_features'] = df.apply(combine_features, axis=1)

# Show the first few rows with combined features
print(df[['title', 'combined_features']].head())


                                      title  \
0                                    Avatar   
1  Pirates of the Caribbean: At World's End   
2                                   Spectre   
3                     The Dark Knight Rises   
4                               John Carter   

                                   combined_features  
0  culture clash future space war space colony so...  
1  ocean drug abuse exotic island east india trad...  
2  spy based on novel secret agent sequel mi6 Dan...  
3  dc comics crime fighter terrorist secret ident...  
4  based on novel mars medallion space travel pri...  


In [4]:
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
 # Initialize CountVectorizer to convert text to numerical vectors
 cv = CountVectorizer(stop_words='english')
 # Transform the combined features into a count matrix
 count_matrix = cv.fit_transform(df['combined_features'])
 # Calculate the cosine similarity between the movies
 cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [8]:
#Function to get the title of a movie based on index
def get_title_from_index(index):
 return df.iloc[index]['title']
 #Function to get the index of a movie based on its title
def get_index_from_title(title):
 return df[df['title'] == title].index.values[0]

In [9]:
def get_movie_recommendations(movie_user_likes):
    # Check if the movie exists in the dataset
    if movie_user_likes not in df['title'].values:
        print(f"Sorry, the movie '{movie_user_likes}' was not found in the dataset. Please try another title.")
        return

    # Get the index of the movie that the user likes
    movie_index = get_index_from_title(movie_user_likes)

    # Get the cosine similarity scores for that movie
    similar_movies = list(enumerate(cosine_sim[movie_index]))

    # Sort the movies based on similarity scores (descending order)
    sorted_similar_movies = sorted(similar_movies, key=lambda x: x[1], reverse=True)

    # Print the top 5 similar movies
    print(f"\nTop 5 similar movies to '{movie_user_likes}' are:")
    for i, (index, score) in enumerate(sorted_similar_movies[1:6]):  # Skip the first (it's the same movie)
        print(f"{i+1}. {get_title_from_index(index)} (Similarity Score: {score:.2f})")


In [10]:
# Display some movie titles from the dataset to guide the user
print("\nSome movie titles in the dataset are:")
print(df['title'].head(10))  # Show the first 10 movie titles as examples

# Get movie recommendations based on user input
movie_user_likes = input("\nEnter a movie title from the above list: ")
get_movie_recommendations(movie_user_likes)



Some movie titles in the dataset are:
0                                      Avatar
1    Pirates of the Caribbean: At World's End
2                                     Spectre
3                       The Dark Knight Rises
4                                 John Carter
5                                Spider-Man 3
6                                     Tangled
7                     Avengers: Age of Ultron
8      Harry Potter and the Half-Blood Prince
9          Batman v Superman: Dawn of Justice
Name: title, dtype: object



Enter a movie title from the above list:  Avatar



Top 5 similar movies to 'Avatar' are:
1. Guardians of the Galaxy (Similarity Score: 0.42)
2. Aliens (Similarity Score: 0.38)
3. Star Wars: Clone Wars: Volume 1 (Similarity Score: 0.35)
4. Star Trek Into Darkness (Similarity Score: 0.34)
5. Star Trek Beyond (Similarity Score: 0.34)


In [None]:
#📌 Goal:
#Recommend similar movies based on content (genres, cast, director, keywords).

#🧹 Steps:
#Load Data: Use pandas to read the movie dataset (CSV).

#Preprocess: Fill missing values and combine features like keywords, cast, genres, and director into one text string per movie.

#Vectorize Text: Use CountVectorizer to convert the combined text into a numeric matrix (Bag-of-Words).

#Compute Similarity: Use cosine_similarity to find how similar two movies are based on their vectors.

#Recommend: For a given movie, find and return the most similar ones by sorting similarity scores.

#✅ Type:
#This is a content-based filtering system using movie attributes, not user data.