In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load data
movies = pd.read_csv("tmdb_5000_movies.csv")
credits = pd.read_csv("tmdb_5000_credits.csv")

# Merge datasets
data = pd.merge(movies, credits, left_on='id', right_on='movie_id')

# Clean and preprocess data
data['genres'] = data['genres'].apply(lambda x: ' '.join(sorted([g['name'] for g in eval(x)])))
data['cast'] = data['cast'].apply(lambda x: ' '.join(sorted([c['name'] for c in eval(x)])))
data['crew'] = data['crew'].apply(lambda x: ' '.join(sorted([c['name'] for c in eval(x) if c['job'] == 'Director'])))

# Create a feature matrix using CountVectorizer
cv = CountVectorizer()
features = cv.fit_transform(data['genres'] + ' ' + data['cast'] + ' ' + data['crew'])

# Calculate cosine similarity
cosine_sim = cosine_similarity(features)

# Function to get recommendations
def get_recommendations(title, cosine_sim=cosine_sim):
    idx = data[data['original_title'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # Exclude the movie itself
    movie_indices = [i[0] for i in sim_scores]
    return data['original_title'].iloc[movie_indices]

# Test the recommendation system
movie_title = "Shrek the Third"
recommendations = get_recommendations(movie_title)
print(f"Recommendations for '{movie_title}':")
print(recommendations)


Recommendations for 'Shrek the Third':
565                        Shrek 2
86             Shrek Forever After
899                          Shrek
67              Monsters vs Aliens
509                     Madagascar
187                  Puss in Boots
179         Penguins of Madagascar
120    Madagascar: Escape 2 Africa
196                       Megamind
458                      Yogi Bear
Name: original_title, dtype: object


In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load data
movies = pd.read_csv("tmdb_5000_movies.csv")
credits = pd.read_csv("tmdb_5000_credits.csv")

# Merge datasets
data = pd.merge(movies, credits, left_on='id', right_on='movie_id')

# Clean and preprocess data
data['genres'] = data['genres'].apply(lambda x: ' '.join(sorted([g['name'] for g in eval(x)])))
data['cast'] = data['cast'].apply(lambda x: ' '.join(sorted([c['name'] for c in eval(x)])))

# Create a feature matrix using CountVectorizer
cv = CountVectorizer()
features = cv.fit_transform(data['genres'] + ' ' + data['cast'])

# Calculate cosine similarity
cosine_sim = cosine_similarity(features)

# Function to get recommendations
def get_recommendations(title, cosine_sim=cosine_sim):
    idx = data[data['original_title'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # Exclude the movie itself
    movie_indices = [i[0] for i in sim_scores]
    return data['original_title'].iloc[movie_indices]

# Test the recommendation system
movie_title = "Shrek the Third"
recommendations = get_recommendations(movie_title)
print(f"Recommendations for '{movie_title}':")
print(recommendations)


Recommendations for 'Shrek the Third':
565                         Shrek 2
86              Shrek Forever After
899                           Shrek
67               Monsters vs Aliens
509                      Madagascar
187                   Puss in Boots
179          Penguins of Madagascar
196                        Megamind
4664                        Bronson
120     Madagascar: Escape 2 Africa
Name: original_title, dtype: object


In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load data
movies = pd.read_csv("tmdb_5000_movies.csv")

# Clean and preprocess data
movies['overview'] = movies['overview'].fillna('')  # Fill missing values with empty string

# Create a TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words='english')

# Construct the TF-IDF matrix
tfidf_matrix = tfidf.fit_transform(movies['overview'])

# Calculate cosine similarity
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Function to get recommendations
def get_recommendations(title, cosine_sim=cosine_sim, movies=movies):
    idx = movies[movies['original_title'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # Exclude the movie itself
    movie_indices = [i[0] for i in sim_scores]
    return movies['original_title'].iloc[movie_indices]

# Test the recommendation system
movie_title = "Shrek the Third"
recommendations = get_recommendations(movie_title)
print(f"Recommendations for '{movie_title}':")
print(recommendations)


Recommendations for 'Shrek the Third':
565                           Shrek 2
86                Shrek Forever After
899                             Shrek
1246                Quest for Camelot
2517                The King's Speech
80        Snow White and the Huntsman
2366                     Dragonslayer
4266    House at the End of the Drive
4621                       Royal Kill
3608                Solomon and Sheba
Name: original_title, dtype: object
