In [1]:
import pandas as pd

In [2]:
movies = pd.read_csv(r"C:\Users\HP\Desktop\Movie Recommendation system\archive\movie.csv")
ratings = pd.read_csv(r"C:\Users\HP\Desktop\Movie Recommendation system\archive\rating.csv")
tags = pd.read_csv(r"C:\Users\HP\Desktop\Movie Recommendation system\archive\tag.csv")

In [3]:
movies['genres'] = movies['genres'].apply(lambda x: x.split('|') if isinstance(x, str) else x)
import re
movies['year'] = movies['title'].apply(lambda x: re.findall(r'\((\d{4})\)', x))
movies['year'] = movies['year'].apply(lambda x: x[0] if x else None)

In [4]:
tags = tags[tags['tag'].notna()].copy()  # <-- Make a copy to avoid the warning
tags.loc[:, 'tag'] = tags['tag'].astype(str).str.lower().str.strip()

In [5]:
movie_data = pd.merge(movies, tags, on='movieId', how='left')

In [6]:
ratings_summary = ratings.groupby('movieId')['rating'].mean().reset_index()
ratings_summary.rename(columns={'rating': 'avg_rating'}, inplace=True)

movie_data = pd.merge(movie_data, ratings_summary, on='movieId', how='left')


In [7]:
mood_keywords = {
    "happy": ["funny", "hilarious", "lighthearted", "feel-good"],
    "sad": ["emotional", "tragic", "heartbreaking", "tearjerker"],
    "excited": ["action", "thrilling", "fast-paced"],
    "bored": ["mystery", "suspense", "mind-bending"],
    "romantic": ["love", "romance", "passion"],
}

In [8]:
def get_mood_from_tags(tag_text):
    moods = []
    for mood, keywords in mood_keywords.items():
        if any(keyword in tag_text for keyword in keywords):
            moods.append(mood)
    return moods if moods else ["general"]

movie_data['mood'] = movie_data['tag'].fillna("").apply(get_mood_from_tags)

In [9]:
weather_genre_map = {
    "sunny": ["Adventure", "Comedy"],
    "rainy": ["Romance", "Drama"],
    "cloudy": ["Sci-Fi", "Mystery"]
}

day_genre_map = {
    "Monday": ["Motivational", "Documentary"],
    "Friday": ["Thriller", "Action"],
    "Sunday": ["Family", "Comedy"]
}

In [10]:
def get_recommended_movies(user_mood, weather_condition, day_of_week, df):

    weather_genres = weather_genre_map.get(weather_condition.lower(), [])
    day_genres = day_genre_map.get(day_of_week, [])

   
    mood_filtered = df[df['mood'].apply(lambda moods: user_mood in moods)]

    
    def genre_match(genres):
        return any(genre in weather_genres + day_genres for genre in genres)
    
    genre_filtered = mood_filtered[mood_filtered['genres'].apply(genre_match)]

    
    sorted_recommendations = genre_filtered.sort_values(by='avg_rating', ascending=False)

    return sorted_recommendations.head(10)  # top 10 recommendations


In [11]:
movie_data['search_features'] = movie_data['genres'].apply(lambda x: ' '.join(x)) + " " + movie_data['tag'].fillna('')

In [12]:
movie_data.to_csv("processed_movie_data.csv", index=False)

In [13]:
movie_data.head()

Unnamed: 0,movieId,title,genres,year,userId,tag,timestamp,avg_rating,mood,search_features
0,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]",1995,1644.0,watched,2014-12-04 23:44:40,3.92124,[general],Adventure Animation Children Comedy Fantasy wa...
1,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]",1995,1741.0,computer animation,2007-07-08 13:59:15,3.92124,[general],Adventure Animation Children Comedy Fantasy co...
2,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]",1995,1741.0,disney animated feature,2007-07-08 22:21:47,3.92124,[general],Adventure Animation Children Comedy Fantasy di...
3,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]",1995,1741.0,pixar animation,2007-07-08 22:46:10,3.92124,[general],Adventure Animation Children Comedy Fantasy pi...
4,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]",1995,1741.0,tã©a leoni does not star in this movie,2009-06-15 19:19:33,3.92124,[general],Adventure Animation Children Comedy Fantasy tã...


In [14]:
movie_data["tag"][1]

'computer animation'

In [15]:
unique_genres = ['Action', 'Comedy', 'Romance', 'Drama', 'Thriller', 'Sci-Fi']
unique_moods = ['happy', 'sad', 'excited', 'bored']
unique_weather = ['sunny', 'rainy', 'cloudy']
unique_days = ['Monday', 'Tuesday', ..., 'Sunday']


In [None]:

movie_vector = [1, 0, 0, 0, 1, 0]  


In [None]:

mood_vector = [1, 0, 0, 0]


weather_vector = [0, 1, 0]


day_vector = [0, 0, 0, 0, 1, 0, 0]


In [18]:
import numpy as np

user_vector = np.concatenate([mood_vector, weather_vector, day_vector])


In [None]:

movie_vector = [1, 0, 1, 0, 0, 1]  




In [None]:
import numpy as np

# User input
mood = "happy"
weather = "rainy"
day = "Friday"

context_vector = np.array(weather_genre_map[weather]) + \
                 np.array(day_genre_map[day])


In [21]:
movies['genres'] = movies['genres'].apply(
    lambda x: x if isinstance(x, list) else str(x).split('|')
)


In [None]:

all_genres = sorted(set(g for sub in movies['genres'] for g in sub))
genre_to_index = {genre: idx for idx, genre in enumerate(all_genres)}

# Convert each movie to a multi-hot genre vector
def genre_to_vector(genres):
    vector = np.zeros(len(genre_to_index))
    for genre in genres:
        if genre in genre_to_index:
            vector[genre_to_index[genre]] = 1
    return vector

movies['genre_vector'] = movies['genres'].apply(genre_to_vector)


In [None]:

import pickle

with open("genre_to_index.pkl", "wb") as f:
    pickle.dump(genre_to_index, f)
with open("movies.pkl", "wb") as f:
    pickle.dump(movies, f)


In [24]:
movie_vectors = np.stack(movies['genre_vector'].values)


In [None]:
context_vector = np.array([1, 0, 0, 1, 0, 0, 1, 0, 0])  

In [26]:
context_vector = genre_to_vector(['Romance', 'Thriller'])


In [None]:
from sklearn.metrics.pairwise import cosine_similarity

scores = cosine_similarity([context_vector], movie_vectors) 
top_indices = scores[0].argsort()[::-1][:10]
top_movies = movies.iloc[top_indices]


In [28]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd

def recommend_by_genre(input_genres, movies_df, genre_to_vector, top_n=10):
    """
    Recommend movies based on genre input using cosine similarity.

    Parameters:
    - input_genres (list of str): e.g., ['Romance', 'Thriller']
    - movies_df (DataFrame): must include 'title' and 'genre_vector'
    - genre_to_vector (function): function that converts genres to one-hot vector
    - top_n (int): number of top results to return

    Returns:
    - DataFrame: top N recommended movies
    """
    # Convert input genres to one-hot vector
    context_vector = genre_to_vector(input_genres)

    # Stack movie vectors from the DataFrame
    movie_vectors = np.stack(movies_df['genre_vector'].values)

    # Compute cosine similarity
    scores = cosine_similarity([context_vector], movie_vectors)

    # Get top N indices sorted by similarity score
    top_indices = scores[0].argsort()[::-1][:top_n]

    # Return top recommended movies
    return movies_df.iloc[top_indices][['title', 'genres']].reset_index(drop=True)
