In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Load the movie dataset
movies_df = pd.read_csv('https://raw.githubusercontent.com/rashida048/Some-NLP-Projects/master/movie_dataset.csv')

In [3]:
# Select relevant features
features = ['keywords', 'cast', 'genres', 'director']

In [4]:
# Replace missing values with empty strings
for feature in features:
    movies_df[feature] = movies_df[feature].fillna('')

In [5]:
# Create a combined feature column
movies_df['combined_features'] = movies_df.apply(lambda row: ' '.join(row[feature] for feature in features), axis=1)

In [6]:
# Calculate the cosine similarity matrix
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(movies_df['combined_features'])
cosine_sim = cosine_similarity(tfidf_matrix)

In [7]:
# Define a function to get movie recommendations
def get_recommendations(movie_title, cosine_sim=cosine_sim):
    idx = movies_df[movies_df['title'] == movie_title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    movie_indices = [i[0] for i in sim_scores]
    return movies_df['title'].iloc[movie_indices]


In [9]:
# Test the function with some movie titles
print(get_recommendations('The Dark Knight Rises'))
print(get_recommendations('The Godfather'))
print(get_recommendations('Avatar'))

119                Batman Begins
65               The Dark Knight
1196                The Prestige
428               Batman Returns
95                  Interstellar
1359                      Batman
210               Batman & Robin
4638    Amidst the Devil's Wings
813                     Superman
1013                    Child 44
Name: title, dtype: object
867     The Godfather: Part III
2731     The Godfather: Part II
1525             Apocalypse Now
1408                     Closer
1874                August Rush
3450            West Side Story
2674             Stomp the Yard
3743          Leaving Las Vegas
4467          American Graffiti
1061              Love Actually
Name: title, dtype: object
94      Guardians of the Galaxy
2403                     Aliens
3158                      Alien
47      Star Trek Into Darkness
1053               Galaxy Quest
56             Star Trek Beyond
838                      Alien³
239                     Gravity
2696                    Jason X
3730    