In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# -------------------------------
# Generate Synthetic Movie Data
# -------------------------------

np.random.seed(42)

genres_list = ['Action', 'Adventure', 'Comedy', 'Drama', 'Fantasy', 'Horror', 'Musical', 'Romance', 'Sci-Fi', 'Thriller']
directors = [f'Director_{i}' for i in range(10)]
actors = [f'Actor_{i}' for i in range(50)]

def random_tags():
    return np.random.choice(genres_list, size=np.random.randint(2, 5), replace=False)

def generate_movies(n=100):
    data = []
    for i in range(n):
        genres = random_tags()
        data.append({
            'movie_id': i+1,
            'title': f'Movie_{i+1}',
            'genres': ' '.join(genres),
            'director': np.random.choice(directors),
            'actors': ' '.join(np.random.choice(actors, size=3, replace=False))
        })
    return pd.DataFrame(data)

movies = generate_movies(120)

# -------------------------------
# Create Combined Feature String
# -------------------------------

movies['combined'] = movies['title'] + ' ' + movies['genres'] + ' ' + movies['director'] + ' ' + movies['actors']

# -------------------------------
# Generate Fake User Ratings
# -------------------------------

def generate_user_ratings(user_id, movies, genre_bias, n_ratings=20):
    scores = []
    for _ in range(n_ratings):
        movie = movies.sample(1).iloc[0]
        genre_score = sum([g in movie['genres'] for g in genre_bias])
        rating = np.clip(genre_score + np.random.normal(2.5, 1), 1, 5)
        scores.append({'user_id': user_id, 'movie_id': movie['movie_id'], 'rating': round(rating)})
    return pd.DataFrame(scores)

# Simulate male/female ratings with genre preferences
male_ratings = generate_user_ratings(1, movies, genre_bias=['Action', 'Sci-Fi', 'Thriller'])
female_ratings = generate_user_ratings(2, movies, genre_bias=['Romance', 'Drama', 'Musical'])

ratings = pd.concat([male_ratings, female_ratings], ignore_index=True)

# -------------------------------
# Build User Profiles
# -------------------------------

def build_profile(user_id):
    user_data = ratings[ratings['user_id'] == user_id].merge(movies, on='movie_id')
    weighted_text = ' '.join((user_data['combined'] + ' ') * user_data['rating'])
    return weighted_text

male_profile = build_profile(1)
female_profile = build_profile(2)

# -------------------------------
# TF-IDF and Similarity
# -------------------------------

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['combined'])

male_vec = tfidf.transform([male_profile])
female_vec = tfidf.transform([female_profile])
couple_vec = (male_vec + female_vec) / 2

cos_sim = cosine_similarity(couple_vec, tfidf_matrix).flatten()

# Remove movies already rated
rated_ids = set(ratings['movie_id'])
unseen_indices = [i for i in range(len(movies)) if movies.iloc[i]['movie_id'] not in rated_ids]

top_indices = sorted(unseen_indices, key=lambda i: cos_sim[i], reverse=True)[:10]

# -------------------------------
# Output Recommendations
# -------------------------------

print("🎬 Top 10 Movie Recommendations for the Couple:\n")
print(movies.iloc[top_indices][['movie_id', 'title', 'genres']].to_string(index=False))


🎬 Top 10 Movie Recommendations for the Couple:

 movie_id     title                            genres
       71  Movie_71    Comedy Action Thriller Musical
       93  Movie_93     Romance Sci-Fi Horror Musical
       16  Movie_16                     Sci-Fi Comedy
      117 Movie_117 Comedy Romance Adventure Thriller
       54  Movie_54                  Adventure Sci-Fi
       97  Movie_97      Comedy Sci-Fi Action Romance
       39  Movie_39   Adventure Sci-Fi Horror Musical
       91  Movie_91    Action Horror Thriller Romance
       85  Movie_85       Drama Sci-Fi Musical Comedy
       67  Movie_67   Adventure Fantasy Comedy Sci-Fi
