# Assigning Moods to Users

In [3]:
import pandas as pd
import numpy as np
import random

ratings_df = pd.read_csv("movielens_dataset/ratings.csv")

def assign_mood():
    possible_moods = ['Joy', 'Sadness', 'Anger', 'Fear', 'Disgust']
    return random.choice(possible_moods)

user_profiles = {}
mood_counter = {'Joy': 0, 'Sadness': 0, 'Anger': 0, 'Fear': 0, 'Disgust': 0}

for user_id in ratings_df['userId'].unique():
    user_ratings = ratings_df[ratings_df['userId'] == user_id]
    mood = assign_mood()
    preferred_movies = user_ratings[user_ratings['rating'] >= 4]['movieId'].tolist()

    user_profiles[user_id] = {
        'mood': mood,
        'preferred_movies': preferred_movies
    }
    
    mood_counter[mood] += 1

print(mood_counter)


{'Joy': 110, 'Sadness': 117, 'Anger': 109, 'Fear': 150, 'Disgust': 124}


# Find the features for the movie and the user to predicts users feedback

In [4]:
from sklearn.metrics.pairwise import cosine_similarity
from joblib import load

# Retrieving data from the MovieLens csv files
ratings = pd.read_csv("movielens_dataset/ratings.csv")
movies = pd.read_csv("movielens_dataset/movies.csv")
tags = pd.read_csv("movielens_dataset/tags.csv")

# User-item matrix creation
user_movie_matrix = ratings.pivot_table(index='userId', columns='movieId', values='rating').fillna(0)

# Calculation of the similarity matrix between users
user_similarity_matrix = cosine_similarity(user_movie_matrix)
user_similarity_df = pd.DataFrame(user_similarity_matrix, index=user_movie_matrix.index, columns=user_movie_matrix.index)

# Compute the similarity matrix between movies
item_similarity_matrix = cosine_similarity(user_movie_matrix.T)
item_similarity_df = pd.DataFrame(item_similarity_matrix, index=user_movie_matrix.columns, columns=user_movie_matrix.columns)

user_means = user_movie_matrix.mean(axis=1)
ratings_for_avg = ratings.merge(movies, on='movieId')
genres_for_avg = ratings_for_avg['genres'].str.split('|', expand=True)
merged_df = pd.concat([ratings_for_avg, genres_for_avg], axis=1)
genres_long = pd.melt(merged_df, id_vars=['userId', 'movieId', 'title','rating', 'timestamp', 'genres'], value_vars=list(genres_for_avg.columns))
average_rating_per_genre = genres_long.groupby('value')['rating'].mean()
genres_long.dropna(inplace=True)

# Get top N similar users for a given user
def get_top_similar_users(similarity_df, user_id, top_n):
    user_similarities = similarity_df.loc[user_id]
    top_users = user_similarities.sort_values(ascending=False).head(top_n).index
    return top_users

# Get top N similar movies for a given movie
def get_top_similar_movies(similarity_df, movie_id, top_n):
    movie_similarities = similarity_df.loc[movie_id]
    top_movies = movie_similarities.sort_values(ascending=False).head(top_n).index
    return top_movies
    
# Calculate the average of user means
def average_user_means(users):
    return user_means.loc[users].mean()

# Calculate the average rating if there are ratings
def calculate_average_rating(similar_movies_ids, all_ratings_df):
    similar_movies_ratings = all_ratings_df[all_ratings_df['movieId'].isin(similar_movies_ids)]
    
    if not similar_movies_ratings.empty:
        return similar_movies_ratings['rating'].mean()
    else:
        return np.nan

# Calculate the average of user means
def average_user_means(users):
    return user_means.loc[users].mean()

# Calculate weighted average rating for each row
def calculate_weighted_avg(row):
    genres = row[8:]  
    valid_genres = [genre for genre in genres if genre in average_rating_per_genre]  
    if valid_genres:
        sum_ratings = sum(average_rating_per_genre[genre] for genre in valid_genres if genre)
        count_genres = len([genre for genre in row[6:] if genre in average_rating_per_genre and genre])
        return sum_ratings / count_genres if count_genres > 0 else 0
    else:
        return 0  

# Use all the function above to find features in particular movie, ratings and user for feedback predictor
def feature_extractor(user_id, movie_id, ratings_df, movies_df, tags_df, user_similarity_df, item_similarity_df):
    feature_dict = {}

    top_similar_users = get_top_similar_users(user_similarity_df, user_id, 5)
    feature_dict['top_similar_users'] = top_similar_users
    
    if movie_id not in item_similarity_df.index:
        try:
            top_similar_movies = get_top_similar_movies(item_similarity_df, movie_id, 1)
            feature_dict['top_similar_movies'] = top_similar_movies
            avg_rating_similar_movies = calculate_average_rating(top_similar_movies, ratings_df)
            feature_dict['avg_rating_of_similar_movies'] = avg_rating_similar_movies
        except Exception as e:
            feature_dict['top_similar_movies'] = None
            feature_dict['avg_rating_of_similar_movies'] = 0
    else:
        top_similar_movies = get_top_similar_movies(item_similarity_df, movie_id, 5)
        feature_dict['top_similar_movies'] = top_similar_movies
        avg_rating_similar_movies = calculate_average_rating(top_similar_movies, ratings_df)
        feature_dict['avg_rating_of_similar_movies'] = avg_rating_similar_movies

    feature_dict['avg_rating_similar_users'] = average_user_means(feature_dict['top_similar_users'])

    user_data = ratings_df[ratings_df['userId'] == user_id]
    movie_data = movies_df[movies_df['movieId'] == movie_id]
    
    user_ratings = ratings_df[ratings_df['userId'] == user_id]
    movie_ratings = ratings_df[ratings_df['movieId'] == movie_id]
    user_movie_rating = user_ratings[user_ratings['movieId'] == movie_id]

    feature_dict['user_avg_rating'] = user_ratings['rating'].mean() if not user_ratings.empty else ratings_df['rating'].mean()
    feature_dict['user_rating_count'] = user_ratings.shape[0]

    feature_dict['movie_avg_rating'] = movie_ratings['rating'].mean() if not movie_ratings.empty else ratings_df['rating'].mean()
    feature_dict['movie_rating_count'] = movie_ratings.shape[0]
    
    genres_df = movies_df['genres'].str.get_dummies(sep='|')
    movie_genres = genres_df.loc[movie_id] if movie_id in genres_df.index else pd.Series([0] * genres_df.shape[1], index=genres_df.columns)
    feature_dict.update(movie_genres.to_dict())

    if not user_movie_rating.empty:
        feature_dict['timestamp'] = user_movie_rating['timestamp'].max()
    else:
        if not movie_ratings.empty:
            feature_dict['timestamp'] = movie_ratings['timestamp'].mean()
        else:
            feature_dict['timestamp'] = pd.Timestamp.now().timestamp()
    
    datetime_obj = pd.to_datetime(feature_dict['timestamp'], unit='s')
    feature_dict['hour'] = datetime_obj.hour
    feature_dict['day_of_week'] = datetime_obj.dayofweek
    feature_dict['day'] = datetime_obj.day
    feature_dict['month'] = datetime_obj.month
    feature_dict['year'] = datetime_obj.year
    
    feature_dict['interaction_movie_user'] = feature_dict['user_avg_rating'] * feature_dict['user_rating_count']
    
    aggregated_tags = tags_df[(tags_df['movieId'] == movie_id)]['tag'].tolist()
    feature_dict['tag_count'] = len(aggregated_tags)

    feature_dict['interaction_movie_user'] = feature_dict['movie_avg_rating'] * feature_dict['user_rating_count']
    feature_dict['user_rating_count_squared'] = feature_dict['user_rating_count'] ** 2

    feature_dict['movieId'] = movie_id
    feature_dict['userId'] = user_id

    movie_genres_list = movies_df.loc[movies_df['movieId'] == movie_id, 'genres'].str.split('|').values
    if movie_genres_list.size > 0:
        feature_dict['num_genres'] = len(movie_genres_list[0])
    else:
        feature_dict['num_genres'] = 0

    feature_dict['is_drama'] = int('Drama' in movie_genres_list[0]) if movie_genres_list.size > 0 else 0

    movie_popularity = ratings_df.groupby('movieId')['rating'].count().reset_index(name='movie_rating_count')
    popularity_threshold = movie_popularity['movie_rating_count'].median()
    
    movie_rating_count = ratings_df[ratings_df['movieId'] == movie_id].shape[0]
    feature_dict['is_popular'] = int(movie_rating_count >= popularity_threshold)

    movies_df['genres'] = movies_df['genres'].apply(lambda x: x.split('|') if isinstance(x, str) else x)
    movie_genres = movies_df.loc[movies_df['movieId'] == movie_id, 'genres'].explode().dropna()

    if not movie_genres.empty:
        feature_dict['weighted_avg_rating'] = movie_genres.map(average_rating_per_genre).mean()
    else:
        feature_dict['weighted_avg_rating'] = 0  

    feature_dict['timestamp'] = int(feature_dict['timestamp'])
    
    features_df = pd.DataFrame([feature_dict])
    
    features_df = features_df.drop(['top_similar_users', 'top_similar_movies', '(no genres listed)', 'Action', 'Adventure',
       'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama',
       'Fantasy', 'Film-Noir', 'Horror', 'IMAX', 'Musical', 'Mystery',
       'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'], axis=1)
    return features_df

user_id = 383  
movie_id = 1076 
features_for_prediction = feature_extractor(user_id, movie_id, pd.read_csv("movielens_dataset/ratings.csv"), pd.read_csv("movielens_dataset/movies.csv"), pd.read_csv("movielens_dataset/tags.csv"), user_similarity_df, item_similarity_df)

print(features_for_prediction)



   avg_rating_of_similar_movies  avg_rating_similar_users  user_avg_rating  \
0                             0                  0.017555         3.794118   

   user_rating_count  movie_avg_rating  movie_rating_count   timestamp  hour  \
0                 34          3.501557                   0  1714298099     9   

   day_of_week  day  ...  year  interaction_movie_user  tag_count  \
0            6   28  ...  2024              119.052937          1   

   user_rating_count_squared  movieId  userId  num_genres  is_drama  \
0                       1156     1076     383           3         1   

   is_popular  weighted_avg_rating  
0           0             3.469362  

[1 rows x 21 columns]


In [5]:
desired_column_order = ['userId', 'movieId', 'timestamp', 'weighted_avg_rating', 'num_genres',
       'user_avg_rating', 'is_drama', 'hour', 'day_of_week', 'day', 'month',
       'year', 'user_rating_count', 'movie_avg_rating', 'movie_rating_count',
       'is_popular', 'tag_count', 'avg_rating_similar_users',
       'avg_rating_of_similar_movies', 'interaction_movie_user',
       'user_rating_count_squared']

features_for_prediction = features_for_prediction[desired_column_order]
column_names = features_for_prediction.columns

#### Testing

In [6]:
loaded_model = load('best_model.joblib')
predicted_feedback = loaded_model.predict(features_for_prediction) 
print(predicted_feedback)

[3.5802162]


## Collabrative-based Rec System

In [7]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import random
from datetime import datetime

ratings_rec = pd.read_csv("movielens_dataset/ratings.csv")
movies_rec = pd.read_csv("movielens_dataset/movies.csv")
tags_rec = pd.read_csv("movielens_dataset/tags.csv")

class StoreTagRecommendFunctions:
    def __init__(self, ratings_df, movies_df, tags_df):
        self.ratings = ratings_df
        self.movies = movies_df
        self.tags = tags_df

        self.ratings_counts = self.ratings.groupby('userId').size()
        self.active_users = self.ratings_counts[self.ratings_counts >= 20].index
        self.ratings = self.ratings[self.ratings['userId'].isin(self.active_users)]
        self.movies = self.movies[self.movies['genres'] != '(no genres listed)']

        self.user_movie_matrix = self.ratings.pivot_table(index='userId', columns='movieId', values='rating').fillna(0)
        self.user_similarities = cosine_similarity(self.user_movie_matrix)

        self.movie_tags = pd.merge(self.tags, self.movies, on='movieId')

        self.mood_genre_mapping = {
            "Joy": ['Adventure', 'Children', 'Fantasy', 'Comedy', 'Romance', 'Documentary', 'Musical', 'Animation'],
            "Sadness": ['Drama', 'Thriller', 'Mystery', 'War', 'Action', 'Crime', 'Western', 'Film-Noir'],
            "Anger": ['Action', 'Crime', 'Western', 'Film-Noir'],
            "Fear": ['Horror', 'Sci-Fi'],
            "Disgust": ['Action', 'Crime', 'Western', 'Film-Noir']
        }

        self.previously_recommended_ids = []

    # Calculate softmax values for each score in the vector x using temperature.
    def softmax(self, x, temperature):
        exp_x = np.exp(x / temperature)
        return exp_x / np.sum(exp_x)

    # Build user profile based on movies they have rated highly.
    def build_user_profile(self, user_id):
        user_id = int(user_id)
        high_rated = self.ratings[(self.ratings['userId'] == user_id) & (self.ratings['rating'] >= 3.8)]
        high_rated = high_rated.rename(columns={'movieId_id': 'movieId'})
        high_rated_movies = high_rated.merge(self.movie_tags, on='movieId')

        if high_rated_movies.empty:
            return None

        user_profile_tags = ' '.join(high_rated_movies['tag'].tolist())
        return self.tfidf_vectorizer.transform([user_profile_tags])

    # Recomend popular movies to users
    def recommend_popular_or_trending(self, mood):
        
        aggregated_ratings = self.ratings.groupby('movieId_id').agg(
            average_rating=('rating', 'mean'),
            rating_count=('rating', 'count')
        ).reset_index()

        aggregated_ratings = aggregated_ratings.rename(columns={'movieId_id': 'movieId'})

        movies_with_ratings = pd.merge(self.movies, aggregated_ratings, on='movieId', how='left')

        mood_genres = self.mood_genre_mapping[mood]
        mood_filtered_movies = movies_with_ratings[movies_with_ratings['genres'].apply(lambda x: any(genre in x for genre in mood_genres))]

        if mood_filtered_movies.empty:
            return "No movies available for this mood."

        mood_filtered_movies = mood_filtered_movies.sort_values(by=['average_rating', 'rating_count'], ascending=[False, False])

        return mood_filtered_movies.iloc[0]['title']

    # Create TF-IDF matrix for all movie tags
    def prepare_tfidf_matrix(self):
        self.movie_tags['all_tags'] = self.movie_tags.groupby('movieId')['tag'].transform(lambda x: ' '.join(x.unique()))
        self.tfidf_vectorizer = TfidfVectorizer()
        self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.movie_tags['all_tags'])

    # Calculate the mood relevance through genre score
    def calculate_genre_score(self, genres, mood):
        genre_list = genres.split('|')
        mood_genres = self.mood_genre_mapping[mood]
        score = sum(genre in mood_genres for genre in genre_list) / len(mood_genres)
        return score

    def content_based_recommendations(self, mood, user_id, temperature=1.2):
        if not hasattr(self, 'tfidf_matrix'):
            self.prepare_tfidf_matrix()

        user_profile = self.build_user_profile(user_id)
        if user_profile is None:
            return self.recommend_popular_or_trending(mood)

        mood_genres = self.mood_genre_mapping[mood]
        self.movies = self.movies.reset_index(drop=True)

        for index, row in self.movies.iterrows():
            self.movies.loc[index, 'genre_score'] = self.calculate_genre_score(row['genres'], mood)

        mood_filtered_movies = self.movies[self.movies['genre_score'] > 0]
        mood_filtered_movies = mood_filtered_movies.sort_values(by='genre_score', ascending=False)

        mood_filtered_movie_ids = mood_filtered_movies['movieId'].tolist()
        filtered_indices = [i for i, row in self.movie_tags.iterrows() if row['movieId'] in mood_filtered_movie_ids]
        filtered_tfidf_matrix = self.tfidf_matrix[filtered_indices]

        similarities = cosine_similarity(user_profile, filtered_tfidf_matrix).flatten()
        probabilities = self.softmax(similarities, temperature)

        chosen_index = np.random.choice(range(len(probabilities)), p=probabilities)
        chosen_movie_id = mood_filtered_movies.iloc[chosen_index]['movieId']
        recommended_movie_title = mood_filtered_movies[mood_filtered_movies['movieId'] == chosen_movie_id]['title'].iloc[0]

        return recommended_movie_title


    def collaborative_filtering_recommendations(self, user_id, mood):
        genres = self.mood_genre_mapping[mood]
        
        mood_filtered_movies = self.movies[self.movies['genre_score'] > 0]
        mood_filtered_movie_ids = mood_filtered_movies['movieId'].unique()
        
        valid_movie_ids = [movie_id for movie_id in mood_filtered_movie_ids if movie_id in self.user_movie_matrix.columns]
        
        mood_filtered_matrix = self.user_movie_matrix[valid_movie_ids].copy()
        mood_filtered_matrix.fillna(0, inplace=True)
        
        user_similarities = cosine_similarity(mood_filtered_matrix)
        user_index = list(self.user_movie_matrix.index).index(int(user_id))
        user_sim_scores = list(enumerate(user_similarities[user_index]))
        user_sim_scores = sorted(user_sim_scores, key=lambda x: x[1], reverse=True)
        similar_users_indices = [i[0] for i in user_sim_scores[1:6]] 
        
        similar_users_ratings = mood_filtered_matrix.iloc[similar_users_indices]
        user_ratings = mood_filtered_matrix.iloc[user_index]
        
        recommended = similar_users_ratings.mean(axis=0) - user_ratings
        recommended = recommended[recommended > 0].sort_values(ascending=False)
        
        recommended = recommended.index.intersection(mood_filtered_movies['movieId'])
        
        top_recommended_movie_id = recommended[0] if not recommended.empty else None
        if top_recommended_movie_id:
            top_recommended_movie = self.movies[self.movies['movieId'] == top_recommended_movie_id]['title'].iloc[0]
            return top_recommended_movie, top_recommended_movie_id
        else:
            return None

    # Updates the movie rating based on user's interaction
    def update_user_preference(self, movie_id, action, user_id):
        user_id = int(user_id)
        movie_id = int(movie_id)
    
        new_rating = 3.5 if action == "Like" else 2 if action == "Dislike" else 4.5
    
        if action == "Like":
            self.user_movie_matrix.loc[user_id, movie_id] = 3.5
        elif action == "Dislike":
            self.user_movie_matrix.loc[user_id, movie_id] = 2
        else:
            self.user_movie_matrix.loc[user_id, movie_id] = 4.5
    
        try:
            mask = (self.ratings['userId'] == user_id) & (self.ratings['movieId'] == movie_id)
            if mask.any():
                self.ratings.loc[mask, 'rating'] = new_rating
                self.ratings.loc[mask, 'timestamp'] = datetime.now()
                print("Rating updated.")
            else:
                new_entry = pd.DataFrame({
                    'userId': [user_id],
                    'movieId': [movie_id],
                    'rating': [new_rating],
                    'timestamp': [datetime.now()]
                })
                self.ratings = pd.concat([self.ratings, new_entry], ignore_index=True)
                print("New rating added.")
        except Exception as e:
            print(f"An error occurred: {e}")


    # Process the user's action on a recommended movie
    def process_user_action(self, movie_title, action, user_id, mood, type):
        response = {
            'message': '',
            'next_recommendation_id': None,
            'next_recommendation': None,
            'action': action,
        }

        try:
            movie = self.movies[self.movies['title'] == movie_title].iloc[0]
            movie_id = movie['movieId']
        except IndexError:
            response['message'] = "Movie not found."
            return response

        self.update_user_preference(movie_id, action, user_id)

        if action in ["Like", "Dislike"]:
            try:
                if type == "score":
                    next_recommendation, next_recommendation_id = self.collaborative_filtering_recommendations(user_id, mood)
                elif type == "tag":
                    next_recommendation, next_recommendation_id = self.mood_filtered_tag_recommendations(user_id, mood)
                if next_recommendation:
                    response['next_recommendation'] = next_recommendation
                    response['next_recommendation_id'] = next_recommendation_id
                else:
                    response['message'] += " We're out of recommendations for now."
            except Exception as e:
                print(e)
                response['message'] += f" Error generating next recommendation: {e}"

        return response

## Content-based Rec System

In [9]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import random
from datetime import datetime


ratings_rec = pd.read_csv("movielens_dataset/ratings.csv")
movies_rec = pd.read_csv("movielens_dataset/movies.csv")
tags_rec = pd.read_csv("movielens_dataset/tags.csv")

class TagRecommendFunctions:
    def __init__(self, ratings_df, movies_df, tags_df):
        self.ratings = ratings_df
        self.movies = movies_df
        self.tags = tags_df

        self.ratings_counts = self.ratings.groupby('userId').size()
        self.active_users = self.ratings_counts[self.ratings_counts >= 20].index
        self.ratings = self.ratings[self.ratings['userId'].isin(self.active_users)]
        self.movies = self.movies[self.movies['genres'] != '(no genres listed)']

        self.user_movie_matrix = self.ratings.pivot_table(index='userId', columns='movieId', values='rating').fillna(0)
        self.user_similarities = cosine_similarity(self.user_movie_matrix)

        self.movie_tags = pd.merge(self.tags, self.movies, on='movieId')

        self.mood_genre_mapping = {
            "Joy": ['Adventure', 'Children', 'Fantasy', 'Comedy', 'Romance', 'Documentary', 'Musical', 'Animation'],
            "Sadness": ['Drama', 'Thriller', 'Mystery', 'War', 'Action', 'Crime', 'Western', 'Film-Noir'],
            "Anger": ['Action', 'Crime', 'Western', 'Film-Noir'],
            "Fear": ['Horror', 'Sci-Fi'],
            "Disgust": ['Action', 'Crime', 'Western', 'Film-Noir']
        }

        self.previously_recommended_ids = []
        
    # Calculate softmax values for each score in the vector x using temperature.
    def softmax(self, x, temperature):
        """Compute softmax values for each score in the vector x using temperature."""
        exp_x = np.exp(x / temperature)
        return exp_x / np.sum(exp_x)

    # Build user profile based on movies they have rated highly.
    def build_user_profile(self, user_id):
        user_id = int(user_id)
        high_rated = self.ratings[(self.ratings['userId'] == user_id) & (self.ratings['rating'] >= 3.8)]
        high_rated = high_rated.rename(columns={'movieId_id': 'movieId'})
        high_rated_movies = high_rated.merge(self.movie_tags, on='movieId')

        if high_rated_movies.empty:
            return None

        user_profile_tags = ' '.join(high_rated_movies['tag'].tolist())
        return self.tfidf_vectorizer.transform([user_profile_tags])

    # Recomend popular movies to users
    def recommend_popular_or_trending(self, mood):
        aggregated_ratings = self.ratings.groupby('movieId_id').agg(
            average_rating=('rating', 'mean'),
            rating_count=('rating', 'count')
        ).reset_index()

        aggregated_ratings = aggregated_ratings.rename(columns={'movieId_id': 'movieId'})

        movies_with_ratings = pd.merge(self.movies, aggregated_ratings, on='movieId', how='left')

        mood_genres = self.mood_genre_mapping[mood]
        mood_filtered_movies = movies_with_ratings[movies_with_ratings['genres'].apply(lambda x: any(genre in x for genre in mood_genres))]

        if mood_filtered_movies.empty:
            return "No movies available for this mood."

        mood_filtered_movies = mood_filtered_movies.sort_values(by=['average_rating', 'rating_count'], ascending=[False, False])

        return mood_filtered_movies.iloc[0]['title']

    # Create TF-IDF matrix for all movie tags
    def prepare_tfidf_matrix(self):
        self.movie_tags['all_tags'] = self.movie_tags.groupby('movieId')['tag'].transform(lambda x: ' '.join(x.unique()))
        self.tfidf_vectorizer = TfidfVectorizer()
        self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(self.movie_tags['all_tags'])

    # Calculate the mood relevance through genre score
    def calculate_genre_score(self, genres, mood):
        genre_list = genres.split('|')
        mood_genres = self.mood_genre_mapping[mood]
        score = sum(genre in mood_genres for genre in genre_list) / len(mood_genres)
        return score

    def content_based_recommendations(self, mood, user_id, temperature=1.2):
        if not hasattr(self, 'tfidf_matrix'):
            self.prepare_tfidf_matrix()

        user_profile = self.build_user_profile(user_id)
        if user_profile is None:
            return self.recommend_popular_or_trending(mood)

        mood_genres = self.mood_genre_mapping[mood]
        self.movies = self.movies.reset_index(drop=True)

        for index, row in self.movies.iterrows():
            self.movies.loc[index, 'genre_score'] = self.calculate_genre_score(row['genres'], mood)

        mood_filtered_movies = self.movies[self.movies['genre_score'] > 0]
        mood_filtered_movies = mood_filtered_movies.sort_values(by='genre_score', ascending=False)

        mood_filtered_movie_ids = mood_filtered_movies['movieId'].tolist()
        filtered_indices = [i for i, row in self.movie_tags.iterrows() if row['movieId'] in mood_filtered_movie_ids]
        filtered_tfidf_matrix = self.tfidf_matrix[filtered_indices]

        similarities = cosine_similarity(user_profile, filtered_tfidf_matrix).flatten()
        probabilities = self.softmax(similarities, temperature)

        chosen_index = np.random.choice(range(len(probabilities)), p=probabilities)
        chosen_movie_id = mood_filtered_movies.iloc[chosen_index]['movieId']
        recommended_movie_title = mood_filtered_movies[mood_filtered_movies['movieId'] == chosen_movie_id]['title'].iloc[0]

        return recommended_movie_title

    def mood_filtered_tag_recommendations(self, user_id, mood):
        mood_genres = self.mood_genre_mapping[mood]
        mood_filtered_movies = self.movies[self.movies['genre_score'] > 0]
        mood_filtered_movies = mood_filtered_movies.sort_values(by='genre_score', ascending=False)


        watched_movie_ids_series = self.ratings[self.ratings['userId'] == int(user_id)]['movieId']
        watched_movie_ids = set(watched_movie_ids_series.unique())
        
        if watched_movie_ids_series.empty:
            return "No watched movies found for the user."

        all_excluded_movie_ids = watched_movie_ids.union(self.previously_recommended_ids)
        candidate_movies = mood_filtered_movies[~mood_filtered_movies['movieId'].isin(all_excluded_movie_ids)]
        
        if candidate_movies.empty:
            return "No suitable recommendations based on your mood and preferences at the moment."

        candidate_indices = [self.movie_tags[self.movie_tags['movieId'] == movie_id].index[0] for movie_id in candidate_movies['movieId'] if not self.movie_tags[self.movie_tags['movieId'] == movie_id].index.empty]
        valid_tfidf_matrix = self.tfidf_matrix[candidate_indices]
        
        tag_similarities = cosine_similarity(valid_tfidf_matrix, self.tfidf_matrix)
        mean_similarities = tag_similarities.mean(axis=1)
        max_sim_index = np.argmax(mean_similarities)
        
        recommended_movie_id = candidate_movies.iloc[max_sim_index]['movieId']
        recommended_movie_title = candidate_movies[candidate_movies['movieId'] == recommended_movie_id]['title'].iloc[0]

        self.previously_recommended_ids.append(recommended_movie_id)
        return recommended_movie_title, recommended_movie_id

        
        return recommended_movie_title

    # Updates the movie rating based on user's interaction
    def update_user_preference(self, movie_id, action, user_id):
        user_id = int(user_id)
        movie_id = int(movie_id)
    
        new_rating = 3.5 if action == "Like" else 2 if action == "Dislike" else 4.5
    
        if action == "Like":
            self.user_movie_matrix.loc[user_id, movie_id] = 3.5
        elif action == "Dislike":
            self.user_movie_matrix.loc[user_id, movie_id] = 2
        else:
            self.user_movie_matrix.loc[user_id, movie_id] = 4.5
    
        try:
            mask = (self.ratings['userId'] == user_id) & (self.ratings['movieId'] == movie_id)
            if mask.any():
                self.ratings.loc[mask, 'rating'] = new_rating
                self.ratings.loc[mask, 'timestamp'] = datetime.now()
                print("Rating updated.")
            else:
                new_entry = pd.DataFrame({
                    'userId': [user_id],
                    'movieId': [movie_id],
                    'rating': [new_rating],
                    'timestamp': [datetime.now()]
                })
                self.ratings = pd.concat([self.ratings, new_entry], ignore_index=True)
                print("New rating added.")
        except Exception as e:
            print(f"An error occurred: {e}")

    # Process the user's action on a recommended movie
    def process_user_action(self, movie_title, action, user_id, mood, type):
        """
        Process the user's action on a recommended movie and provide the next steps.
        """
        response = {
            'message': '',
            'next_recommendation_id': None,
            'next_recommendation': None,
            'action': action,
        }

        try:
            movie = self.movies[self.movies['title'] == movie_title].iloc[0]
            movie_id = movie['movieId']
        except IndexError:
            response['message'] = "Movie not found."
            return response

        self.update_user_preference(movie_id, action, user_id)

        if action in ["Like", "Dislike"]:
            try:
                if type == "score":
                    next_recommendation, next_recommendation_id = self.collaborative_filtering_recommendations(user_id, mood)
                elif type == "tag":
                    next_recommendation, next_recommendation_id = self.mood_filtered_tag_recommendations(user_id, mood)
                if next_recommendation:
                    response['next_recommendation'] = next_recommendation
                    response['next_recommendation_id'] = next_recommendation_id
                else:
                    response['message'] += " We're out of recommendations for now."
            except Exception as e:
                print(e)
                response['message'] += f" Error generating next recommendation: {e}"

        return response

# Popular Rec System (Baseline)

In [10]:
import pandas as pd
import numpy as np

class PopularMoviesRecommendationSystem:
    def __init__(self):
        self.movies_df = pd.read_csv("movielens_dataset/movies.csv")
        self.ratings_df = pd.read_csv("movielens_dataset/ratings.csv")
        
        self.sorted_movies_df = self.compute_and_sort_popularity()
        self.top_25_percent_movies = self.get_top_25_percent()
        self.recommended_movie_ids = set()

    def compute_and_sort_popularity(self):
        movie_ratings = pd.merge(self.ratings_df, self.movies_df, on='movieId')
        
        popularity = movie_ratings.groupby('movieId').agg(
            average_rating=pd.NamedAgg(column='rating', aggfunc='mean'),
            rating_count=pd.NamedAgg(column='rating', aggfunc='size')
        ).reset_index()
        
        movies_popularity = pd.merge(self.movies_df, popularity, on='movieId', how='left')
        
        return movies_popularity.sort_values(by=['rating_count', 'average_rating'], ascending=False)

    # Determine the number of movies to consider as the top 25%
    def get_top_25_percent(self):
        top_25_index = int(len(self.sorted_movies_df) * 0.25)
        return self.sorted_movies_df.head(top_25_index)

    def recommend(self):
        available_movies = self.top_25_percent_movies[~self.top_25_percent_movies['movieId'].isin(self.recommended_movie_ids)]
        
        if not available_movies.empty:
            random_index = np.random.randint(0, len(available_movies))
            recommended_movie = available_movies.iloc[random_index]
            
            self.recommended_movie_ids.add(recommended_movie['movieId'])
            
            return recommended_movie['title']
        else:
            return "No more movies available for recommendation."

# Simulator

In [11]:
import random
from sklearn.metrics import classification_report
from joblib import load

def find_movie_id_by_exact_name(movie_name):
    movies_df = pd.read_csv("movielens_dataset/movies.csv")
    matched_movies = movies_df[movies_df['title'] == movie_name]
    if not matched_movies.empty:
        return matched_movies['movieId'].tolist()
    else:
        return "Movie not found."

# Main funtion to run the simulation. Type helps to understand which recomendation system to run
def simulate_user_experience(user_profile, recommendation_system, model, type):
    recommendations = []
    feedbacks = []
    initial_mood = user_profile['mood']
    print(f"Starting simulation with mood: {initial_mood}")

    if type=="popular":
        current_recommendation = recommendation_system.recommend()
    else:
        current_recommendation = recommendation_system.content_based_recommendations(initial_mood, user_id)
        
    while True:
        m_id = find_movie_id_by_exact_name(current_recommendation)
        movie_id = int("".join(map(str, m_id)))
        print(movie_id)
        features_for_prediction = feature_extractor(user_id, movie_id, pd.read_csv("movielens_dataset/ratings.csv"), pd.read_csv("movielens_dataset/movies.csv"), pd.read_csv("movielens_dataset/tags.csv"), user_similarity_df, item_similarity_df)
        
        desired_column_order = ['userId', 'movieId', 'timestamp', 'weighted_avg_rating', 'num_genres',
       'user_avg_rating', 'is_drama', 'hour', 'day_of_week', 'day', 'month',
       'year', 'user_rating_count', 'movie_avg_rating', 'movie_rating_count',
       'is_popular', 'tag_count', 'avg_rating_similar_users',
       'avg_rating_of_similar_movies', 'interaction_movie_user',
       'user_rating_count_squared']
        
        features_for_prediction = features_for_prediction[desired_column_order]
        
        predicted_feedback = model.predict(features_for_prediction) 
        print(predicted_feedback)

        if predicted_feedback <= 2.8:
            feedback_cat = "Dislike"
        elif predicted_feedback <= 3.8:
            feedback_cat = "Like"
        else:
            feedback_cat = "Perfect"
            
        recommendations.append(current_recommendation)
        feedbacks.append(feedback_cat)
        
        print(f"Simulated feedback for '{current_recommendation}': {feedback_cat}")
        
        if feedback_cat == "Perfect":
            print(f"User found the perfect movie: {current_recommendation}")
            break
        else:
            if type == "score":
                res = recommendation_system.process_user_action(current_recommendation, feedback_cat, user_id, initial_mood, "score")
                current_recommendation = res['next_recommendation']
            elif type == "tag":
                res = recommendation_system.process_user_action(current_recommendation, feedback_cat, user_id, initial_mood, "tag")
                current_recommendation = res['next_recommendation']
            elif type == "popular":
                current_recommendation = recommendation_system.recommend()
            if not current_recommendation:
                print("No more recommendations available.")
                break

    print("\nSimulation Summary:")
    print(f"Total recommendations made: {len(recommendations)}")
    if "perfect" in feedbacks:
        print(f"Recommendations until perfect: {feedbacks.index('perfect') + 1}")
    else:
        print("No perfect recommendation found during the simulation.")

    return recommendations, feedbacks, initial_mood

def run_simulation_across_users(user_profiles, model):
    tag_score_simulation_results = {}
    tag_simulation_results = {}
    popular_simulation_results = {}

    for user_id, user_profile in user_profiles.items():
        recommendation_system = StoreTagRecommendFunctions(ratings_rec, movies_rec, tags_rec)
        print(f"\nSimulating experience for User ID: {user_id}")

        recommendations, feedbacks, mood = simulate_user_experience(user_profile, recommendation_system, model, "score")

        print(recommendations)
        print(feedbacks)
        tag_score_simulation_results[user_id] = {
            'userId' : user_id,
            'mood' : mood,
            'recommendations': recommendations,
            'feedbacks': feedbacks,
            'number of recs': len(recommendations),
        }

    for user_id, user_profile in user_profiles.items():
        recommendation_system = TagRecommendFunctions(ratings_rec, movies_rec, tags_rec)
        print(f"\nSimulating experience for User ID: {user_id}")

        recommendations, feedbacks, mood = simulate_user_experience(user_profile, recommendation_system, model, "tag")

        print(recommendations)
        print(feedbacks)
        tag_simulation_results[user_id] = {
            'userId' : user_id,
            'mood' : mood,
            'recommendations': recommendations,
            'feedbacks': feedbacks,
            'number of recs': len(recommendations),
        }

    for user_id, user_profile in user_profiles.items():
        recommendation_system = PopularMoviesRecommendationSystem()
        print(f"\nSimulating experience for User ID: {user_id}")

        recommendations, feedbacks, mood = simulate_user_experience(user_profile, recommendation_system, model, "popular")

        print(recommendations)
        print(feedbacks)
        popular_simulation_results[user_id] = {
            'userId' : user_id,
            'mood' : mood,
            'recommendations': recommendations,
            'feedbacks': feedbacks,
            'number of recs': len(recommendations),
        }

    score_tag_results_df = pd.DataFrame.from_dict(tag_score_simulation_results, orient='index')
    score_tag_results_df.to_csv("simulation_results_score_tag.csv")
    print("All simulations completed. Results saved.")

    tag_results_df = pd.DataFrame.from_dict(tag_simulation_results, orient='index')
    tag_results_df.to_csv("simulation_results_tag.csv")
    print("All simulations completed. Results saved.")

    # Save the results to a file or analyze 
    popular_results_df = pd.DataFrame.from_dict(popular_simulation_results, orient='index')
    popular_results_df.to_csv("simulation_results_popular.csv")
    print("All simulations completed. Results saved.")

# Feedback Predictor
loaded_model = load('best_model.joblib')
run_simulation_across_users(user_profiles, loaded_model)



Simulating experience for User ID: 1
Starting simulation with mood: Disgust
43912
[1.7139667]
Simulated feedback for 'Freedomland (2006)': Dislike
New rating added.
1396
[3.395031]
Simulated feedback for 'Sneakers (1992)': Like
New rating added.
318
[4.0990586]
Simulated feedback for 'Shawshank Redemption, The (1994)': Perfect
User found the perfect movie: Shawshank Redemption, The (1994)

Simulation Summary:
Total recommendations made: 3
No perfect recommendation found during the simulation.
['Freedomland (2006)', 'Sneakers (1992)', 'Shawshank Redemption, The (1994)']
['Dislike', 'Like', 'Perfect']

Simulating experience for User ID: 2
Starting simulation with mood: Anger
102278
[1.2750622]
Simulated feedback for 'Pawn (2013)': Dislike
New rating added.
1396
[3.395031]
Simulated feedback for 'Sneakers (1992)': Like
New rating added.
318
[4.0990586]
Simulated feedback for 'Shawshank Redemption, The (1994)': Perfect
User found the perfect movie: Shawshank Redemption, The (1994)

Simula

  self.ratings.loc[mask, 'timestamp'] = datetime.now()


[4.1826906]
Simulated feedback for 'Star Wars: Episode IV - A New Hope (1977)': Perfect
User found the perfect movie: Star Wars: Episode IV - A New Hope (1977)

Simulation Summary:
Total recommendations made: 2
No perfect recommendation found during the simulation.
['Double Jeopardy (1999)', 'Star Wars: Episode IV - A New Hope (1977)']
['Like', 'Perfect']

Simulating experience for User ID: 203
Starting simulation with mood: Joy
1270
[4.3987007]
Simulated feedback for 'Back to the Future (1985)': Perfect
User found the perfect movie: Back to the Future (1985)

Simulation Summary:
Total recommendations made: 1
No perfect recommendation found during the simulation.
['Back to the Future (1985)']
['Perfect']

Simulating experience for User ID: 204
Starting simulation with mood: Sadness
4251
[1.6128713]
Simulated feedback for 'Chopper (2000)': Dislike
New rating added.
1259
[4.3199825]
Simulated feedback for 'Stand by Me (1986)': Perfect
User found the perfect movie: Stand by Me (1986)

Sim