#### Task 5: Building a Product Recommendation System
Build a simple content-based recommendation system that suggests movie to a user based on movies they have previously liked.

In [22]:
# Import necessary libraries
import pandas as pd
from io import StringIO
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import json
import ast  # For safely parsing string lists/dicts
import warnings
warnings.filterwarnings('ignore')

In [23]:
# Load the CSV into DataFrame
movies = pd.read_csv("tmdb_5000_movies.csv")

# Display the first few rows to verify
movies.head()

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...","[{""iso_3166_1"": ""GB"", ""name"": ""United Kingdom""...",2015-10-26,880674609,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466
3,250000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",http://www.thedarkknightrises.com/,49026,"[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...",en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-07-16,1084939099,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106
4,260000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://movies.disney.com/john-carter,49529,"[{""id"": 818, ""name"": ""based on novel""}, {""id"":...",en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-03-07,284139100,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124


## Step 1
**Feature Extraction:** For content-based recommendation, we'll use:
Genres: Parse the JSON-like string and extract genre names.
Keywords: Parse and extract keyword names.
Overview: Use the movie description text.
We'll combine these into a single "content" feature for each movie.

In [29]:
# Function to parse JSON-like strings and extract names
def parse_features(x):
    try:
        features = ast.literal_eval(x)
        return ' '.join([feature['name'] for feature in features])
    except:
        return ''

# Apply parsing to genres and keywords
movies['genres_parsed'] = movies['genres'].apply(parse_features)
movies['keywords_parsed'] = movies['keywords'].apply(parse_features)

# Combine genres, keywords, and overview into a single content column
movies['content'] = movies['genres_parsed'] + ' ' + movies['keywords_parsed'] + ' ' + movies['overview'].fillna('')

# Initialize TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)  # Limit features for efficiency

# Fit and transform the content
tfidf_matrix = tfidf.fit_transform(movies['content'])

# Shape of the matrix
print(tfidf_matrix.shape)

(4803, 5000)


### Step 2.
**Similarity Calculation:**
Compute the cosine similarity matrix between all movies.

In [30]:
# Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Display shape of similarity matrix
print(cosine_sim.shape)

(4803, 4803)


### Step 3. 
**Build the Recommender Function:**
Create a function that takes a movie title and returns the top 5 similar movies.

In [37]:
# Create a mapping from title to index
title_to_index = pd.Series(movies.index, index=movies['title']).drop_duplicates()

def recommend_movies(title, cosine_sim=cosine_sim):
    if title not in title_to_index:
        return "Movie not found in dataset."
    
    # Get index of the movie
    idx = title_to_index[title]
    
    # Get similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort by similarity (descending)
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get top 5 (excluding itself)
    top_similar = sim_scores[1:6]
    
    # Get movie titles
    movie_indices = [i[0] for i in top_similar]
    return movies['title'].iloc[movie_indices].tolist()

# Example 1: Recommendations for 'Avatar'
print("Recommendations for 'Avatar':")
print(recommend_movies('Avatar'))

# Example 2: Recommendations for 'The Dark Knight Rises'
print("\nRecommendations for 'The Dark Knight Rises':")
print(recommend_movies('The Dark Knight Rises'))

# Example 3: Recommendations for 'Pirates of the Caribbean: At World's End'
print("\nRecommendations for 'Pirates of the Caribbean: At World's End':")
print(recommend_movies("Pirates of the Caribbean: At World's End"))  # Use double quotes here to fix the error

Recommendations for 'Avatar':
['Aliens', 'Alien³', 'Silent Running', 'Moonraker', 'Mission to Mars']

Recommendations for 'The Dark Knight Rises':
['The Dark Knight', 'Batman Returns', 'Batman', 'Batman Forever', 'Batman Begins']

Recommendations for 'Pirates of the Caribbean: At World's End':
["Pirates of the Caribbean: Dead Man's Chest", 'Pirates of the Caribbean: The Curse of the Black Pearl', 'Life of Pi', 'The Pirates! In an Adventure with Scientists!', 'Pirates of the Caribbean: On Stranger Tides']
