In [18]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import json
import ast

In [33]:
movies_metadata = pd.read_csv('movies_metadata.csv', low_memory=False)
ratings = pd.read_csv('ratings_small.csv')

In [35]:
def safe_parse_literal(data):     
    if pd.isnull(data): 
        return {}  # Return an empty dictionary if data is NaN or None
    try:     
        return ast.literal_eval(data)  # Use ast.literal_eval to parse the string
    except (ValueError, SyntaxError): 
        return {}

In [37]:
movies_metadata['collection_name'] = movies_metadata['belongs_to_collection'].apply(lambda x:   safe_parse_literal(x).get('name', '') if isinstance(safe_parse_literal(x), dict) else '')


In [38]:
movies_metadata['genres'] = movies_metadata['genres'].apply(lambda x:  [d['name'] for d in safe_parse_literal(x)] if pd.notnull(x) else [])

In [39]:
# Preprocess data
movies_metadata['overview'] = movies_metadata['overview'].fillna('')

In [40]:
movies_metadata.drop('belongs_to_collection', axis=1, inplace=True)

In [42]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies_metadata['overview'])

In [43]:
movie_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

In [44]:
indices = pd.Series(movies_metadata.index, index=movies_metadata['title']).drop_duplicates()

In [45]:
# Function to get recommendations based on the cosine similarity score of movie overviews
def recommend_movies(title, num_movies=5):  
    idx = indices[title]
    sim_scores = list(enumerate(movie_similarities[idx]))
    sim_scores = sorted(sim_scores, key=lambda x:   x[1], reverse=True)
    sim_scores = sim_scores[1:  num_movies+1]
    movie_indices = [i[0] for i in sim_scores]
    return movies_metadata['title'].iloc[movie_indices]

In [46]:
# Example usage
print(recommend_movies('The Shawshank Redemption'))

16947    They Made Me a Fugitive
6548                 Civil Brand
39141          Seven Times Seven
11327                Brute Force
36701             Women's Prison
Name: title, dtype: object
