In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

    
movies_df = pd.read_csv("/mnt/10EE4B76EE4B5360/College/pccoe/7th Sem/RS/RS-A2_A3_movie.csv")

movies_df.dropna(inplace = True)

movies_df['genres_processed'] = movies_df['genres'].str.replace('|', ' ')

tfidf = TfidfVectorizer(stop_words='english')

tfidf_matrix = tfidf.fit_transform(movies_df['genres_processed'])

print("--- Shape of TF-IDF Matrix ---")
print(f"(Movies, Unique Genres): {tfidf_matrix.shape}")
print("-" * 30 + "\n")


cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

print("--- Shape of Cosine Similarity Matrix ---")
print(f"(Movies, Movies): {cosine_sim.shape}")
print("\n--- Example Similarity (Movie 0 vs. Movie 1) ---")
print(f"Toy Story vs Jumanji: {cosine_sim[0, 1]:.4f}")
print("-" * 30 + "\n")


indices = pd.Series(movies_df.index, index=movies_df['title']).drop_duplicates()



def get_content_recommendations(title, cosine_sim_matrix, data, indices_map):

    if title not in indices_map:
        return f"Error: Movie '{title}' not found in the database."

    idx = indices_map[title]

    sim_scores = list(enumerate(cosine_sim_matrix[idx]))

    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    sim_scores = sim_scores[1:11]

    movie_indices = [i[0] for i in sim_scores]

    return data['title'].iloc[movie_indices]



target_movie = "Toy Story (1995)"
print(f"--- Recommendations for: {target_movie} ---")
recommendations = get_content_recommendations(target_movie, cosine_sim, movies_df, indices)
print(recommendations)

print("\n" + "-" * 30 + "\n")

target_movie_2 = "GoldenEye (1995)"
print(f"--- Recommendations for: {target_movie_2} ---")
recommendations_2 = get_content_recommendations(target_movie_2, cosine_sim, movies_df, indices)
print(recommendations_2)

print("\n" + "-" * 30 + "\n")

target_movie_3 = "Money Train (1995)"
print(f"--- Recommendations for: {target_movie_3} ---")
recommendations_3 = get_content_recommendations(target_movie_3, cosine_sim, movies_df, indices)
print(recommendations_3)

target_movie_4 = "It Takes Two (1995)"
print(f"--- Recommendations for: {target_movie_3} ---")
recommendations_4 = get_content_recommendations(target_movie_4, cosine_sim, movies_df, indices)
print(recommendations_4)



--- Shape of TF-IDF Matrix ---
(Movies, Unique Genres): (27278, 23)
------------------------------

--- Shape of Cosine Similarity Matrix ---
(Movies, Movies): (27278, 27278)

--- Example Similarity (Movie 0 vs. Movie 1) ---
Toy Story vs Jumanji: 0.8134
------------------------------

--- Recommendations for: Toy Story (1995) ---
2209                                           Antz (1998)
3027                                    Toy Story 2 (1999)
3663        Adventures of Rocky and Bullwinkle, The (2000)
3922                      Emperor's New Groove, The (2000)
4790                                 Monsters, Inc. (2001)
10114    DuckTales: The Movie - Treasure of the Lost La...
10987                                     Wild, The (2006)
11871                               Shrek the Third (2007)
13337                       Tale of Despereaux, The (2008)
18274    Asterix and the Vikings (Ast√©rix et les Viking...
Name: title, dtype: object

------------------------------

--- Recommendatio

In [7]:
movies_df.movieId.nunique()

27278