In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
movies = pd.read_csv('data/movies.csv')


In [3]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['genres'])

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()


In [5]:
def get_recommendations(title, cosine_sim=cosine_sim):
    if title not in indices:
        return "Movie not found in the dataset."
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    movie_indices = [i[0] for i in sim_scores]
    return movies['title'].iloc[movie_indices].tolist()


In [6]:
if __name__ == "__main__":
    movie_title = "Toy Story (1995)"
    recommendations = get_recommendations(movie_title)
    print(f"Recommendations for '{movie_title}':")
    for i, rec in enumerate(recommendations, 1):
        print(f"{i}. {rec}")


Recommendations for 'Toy Story (1995)':
1. Antz (1998)
2. Toy Story 2 (1999)
3. Adventures of Rocky and Bullwinkle, The (2000)
4. Emperor's New Groove, The (2000)
5. Monsters, Inc. (2001)
6. Wild, The (2006)
7. Shrek the Third (2007)
8. Tale of Despereaux, The (2008)
9. Asterix and the Vikings (Astérix et les Vikings) (2006)
10. Turbo (2013)
