In [26]:
import pandas as pd

movies = pd.read_csv("movies_metadata.csv", low_memory=False, on_bad_lines='skip')

movies = movies[['title', 'overview']]
movies.dropna(inplace=True)
movies = movies[movies['overview'].str.strip().astype(bool)]
movies.drop_duplicates(inplace=True)

movies = movies.head(1000)
movies.reset_index(drop=True, inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  movies.drop_duplicates(inplace=True)


In [27]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['overview'])

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)



In [28]:
indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()

def get_recommendations(title, cosine_sim=cosine_sim):
    idx = indices.get(title)

    if idx is None:
        print("Movie not found!")
        return []

    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    movie_indices = [i[0] for i in sim_scores]

    return movies['title'].iloc[movie_indices]


In [30]:
movie_name = input("Enter a movie you like: ")
print("Top 5 similar movies:")
print(get_recommendations(movie_name))


Enter a movie you like: godfather
Top 5 similar movies:
Movie not found!
[]


In [31]:
def extract_keywords(user_input):
    user_input = user_input.lower()
    genres = ['action', 'comedy', 'romance', 'thriller', 'adventure', 'drama', 'horror', 'sci-fi', 'animation', 'crime', 'mystery']
    extracted = [genre for genre in genres if genre in user_input]
    return extracted if extracted else ["drama"]


In [32]:
def recommend_by_mood(user_input, top_n=5):
    keywords = extract_keywords(user_input)
    print("Detected mood genres:", keywords)

    filtered_movies = movies[movies['overview'].str.contains('|'.join(keywords), case=False)]

    if filtered_movies.empty:
        return "No matching movies found 😔"

    tfidf_matrix_filtered = tfidf.fit_transform(filtered_movies['overview'])
    cosine_sim_filtered = cosine_similarity(tfidf_matrix_filtered, tfidf_matrix_filtered)

    idx = 0
    sim_scores = list(enumerate(cosine_sim_filtered[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    indices = [i[0] for i in sim_scores]
    return filtered_movies['title'].iloc[indices]


In [33]:
user_mood = input("Tell me your movie mood (e.g., ‘I want a romantic comedy’): ")
print(recommend_by_mood(user_mood))


Tell me your movie mood (e.g., ‘I want a romantic comedy’): drama
Detected mood genres: ['drama']
295                 Quiz Show
285        Once Were Warriors
742            The Grass Harp
151            Beyond Rangoon
592    The Wooden Man's Bride
Name: title, dtype: object


In [35]:
upcoming_movies = pd.DataFrame({
    'title': ['Deadpool 3', 'Dune 2', 'Inside Out 2', 'Beetlejuice 2'],
    'trailer_comments_sentiment': [0.9, 0.85, 0.95, 0.7]
})

def recommend_upcoming(top_n=3):
    recommended = upcoming_movies.sort_values(by='trailer_comments_sentiment', ascending=False)
    return recommended['title'].head(top_n)


In [36]:
print(" Upcoming movies you might enjoy:")
print(recommend_upcoming())


 Upcoming movies you might enjoy:
2    Inside Out 2
0      Deadpool 3
1          Dune 2
Name: title, dtype: object


In [37]:
import json
import os

def save_user_memory(user_id, liked_movie):
    if os.path.exists('user_memory.json'):
        with open('user_memory.json', 'r') as f:
            data = json.load(f)
    else:
        data = {}

    if user_id not in data:
        data[user_id] = []

    if liked_movie not in data[user_id]:
        data[user_id].append(liked_movie)

    with open('user_memory.json', 'w') as f:
        json.dump(data, f)

def recommend_from_memory(user_id):
    try:
        with open('user_memory.json', 'r') as f:
            data = json.load(f)
        past_movies = data.get(user_id, [])
        if not past_movies:
            return "No previous history found for personalization."

        print("🎞️ Your previous liked movies:", past_movies)
        return recommend(past_movies[-1])
    except:
        return "Error accessing memory."


In [38]:
user_id = "bharathi"
liked = input("Enter a movie you recently liked: ")
save_user_memory(user_id, liked)

print("🎯 Based on your watch history, we suggest:")
print(recommend_from_memory(user_id))


Enter a movie you recently liked: deadpool
🎯 Based on your watch history, we suggest:
🎞️ Your previous liked movies: ['deadpool', 'avengers']
Error accessing memory.
