In [40]:
import pandas as pd
from sklearn.decomposition import NMF
import numpy as np
import time
from sklearn.preprocessing import MinMaxScaler


In [41]:
ratings= pd.read_csv('./data/ratings.csv')
movies = pd.read_csv('./data/movies.csv')
movie_id_to_title = dict(zip(movies['movieId'], movies['title']))



In [42]:
def filter_by_average_rating(movies_df, ratings_df, min_rating=0):
    print(f"Filtering movies with average rating >= {min_rating}...")
    
    movies_with_ratings = pd.merge(movies_df, ratings_df, on='movieId', how='inner')
    
    average_ratings = movies_with_ratings.groupby('movieId')['rating'].mean()
    
    popular_movies = average_ratings[average_ratings >= min_rating].index
    
    filtered_movies_with_ratings = movies_with_ratings[movies_with_ratings['movieId'].isin(popular_movies)]
    
    print(f"Kept {len(popular_movies)} movies with average rating >= {min_rating}")
    
    return filtered_movies_with_ratings

In [None]:
def filter_users_by_activity(ratings_df, min_percentile, max_percentile):

    print(f"Filtering users with activity between {min_percentile*100:.0f}th and {max_percentile*100:.0f}th percentiles...")
    
    # Count ratings per user
    user_rating_counts = ratings_df.groupby('userId')['rating'].count()
    print(f"Before filtering: {len(user_rating_counts)} users with rating statistics:")
    print(user_rating_counts.describe())
    
    # Get percentile thresholds
    min_threshold = user_rating_counts.quantile(min_percentile)
    max_threshold = user_rating_counts.quantile(max_percentile)
    
    # Filter users
    filtered_users = user_rating_counts[(user_rating_counts >= min_threshold) & 
                                       (user_rating_counts <= max_threshold)].index
    
    filtered_ratings = ratings_df[ratings_df['userId'].isin(filtered_users)]
    
    # Get statistics after filtering
    user_rating_counts_after = filtered_ratings.groupby('userId')['rating'].count()
    print(f"After filtering: {len(user_rating_counts_after)} users with rating statistics:")
    print(user_rating_counts_after.describe())
    
    return filtered_ratings

In [None]:
def filter_movies_by_popularity(ratings_df, min_ratings):

    print(f"Filtering movies with at least {min_ratings} ratings...")
    
    # Count ratings per movie
    movie_rating_counts = ratings_df.groupby('movieId').size()
    
    print(f"Before filtering: {len(movie_rating_counts)} movies")
    print(f"Movies with <{min_ratings} ratings: {(movie_rating_counts < min_ratings).sum()}")
    
    # Filter movies with sufficient ratings
    popular_movies = movie_rating_counts[movie_rating_counts >= min_ratings].index
    
    filtered_ratings = ratings_df[ratings_df['movieId'].isin(popular_movies)]
    
    print(f"After filtering: {len(popular_movies)} movies kept")
    
    return filtered_ratings

In [45]:
def select_diverse_users(ratings_df, max_users_per_movie=1000, similarity_threshold=0.3):
    """
    Select a diverse set of users, prioritizing users with more ratings.
    
    Parameters:
    -----------
    ratings_df : pandas.DataFrame
        DataFrame containing user ratings
    max_users_per_movie : int
        Maximum number of users allowed per movie
    similarity_threshold : float
        Maximum allowed Jaccard similarity between users
        
    Returns:
    --------
    pandas.DataFrame
        Filtered ratings DataFrame with selected users
    """
    print(f"Selecting diverse users with max {max_users_per_movie} users per movie...")
    
    # Get initial statistics
    total_users = ratings_df['userId'].nunique()
    total_movies = ratings_df['movieId'].nunique()
    total_ratings = len(ratings_df)
    
    print(f"Initial dataset: {total_ratings} ratings from {total_users} users on {total_movies} movies")
    
    # Create a movie-to-users dictionary to track how many users rated each movie
    movie_to_users = {}
    for movie_id, group in ratings_df.groupby('movieId'):
        movie_to_users[movie_id] = set(group['userId'])
    
    # Create a user-to-movies dictionary
    user_to_movies = {}
    for user_id, group in ratings_df.groupby('userId'):
        user_to_movies[user_id] = set(group['movieId'])
    
    # Sort users by the number of ratings (CHANGED: now in descending order)
    user_rating_counts = ratings_df.groupby('userId').size()
    sorted_users = user_rating_counts.sort_values(ascending=False).index.tolist()
    
    # Add some randomness within rating count groups to prevent bias
    # Group users by rating count
    rating_count_groups = {}
    for user_id, count in user_rating_counts.items():
        if count not in rating_count_groups:
            rating_count_groups[count] = []
        rating_count_groups[count].append(user_id)
    
    # Shuffle each group and rebuild the sorted users list
    shuffled_sorted_users = []
    for count in sorted(rating_count_groups.keys(), reverse=True):
        group = rating_count_groups[count]
        np.random.shuffle(group)
        shuffled_sorted_users.extend(group)
    
    selected_users = set()
    movie_user_counts = {movie_id: 0 for movie_id in movie_to_users.keys()}
    
    # Process users in the sorted order (now more ratings first)
    for user_id in shuffled_sorted_users:
        user_movies = user_to_movies[user_id]
        
        # Skip users with too few ratings (optional)
        if len(user_movies) < 0:  # Minimum threshold
            continue
            
        # Check if this user would exceed the max users for any movie
        exceeds_limit = False
        for movie_id in user_movies:
            if movie_user_counts[movie_id] >= max_users_per_movie:
                exceeds_limit = True
                break
        
        # Check similarity with already selected users
        too_similar = False
        if not exceeds_limit and len(selected_users) > 0:
            # Only check similarity with a sample of existing users for efficiency
            # This optimization is crucial when you have many users
            sample_size = min(100, len(selected_users))
            sample_users = np.random.choice(list(selected_users), sample_size, replace=False)
            
            for selected_user in sample_users:
                selected_user_movies = user_to_movies[selected_user]
                
                # Only calculate similarity if there's a minimum overlap
                if len(user_movies) > 0 and len(selected_user_movies) > 0:
                    # Calculate Jaccard similarity between users
                    intersection = len(user_movies.intersection(selected_user_movies))
                    union = len(user_movies.union(selected_user_movies))
                    
                    similarity = intersection / union
                    if similarity > similarity_threshold:
                        too_similar = True
                        break
        
        # Add user if they don't exceed limits and aren't too similar
        if not exceeds_limit and not too_similar:
            selected_users.add(user_id)
            
            # Update movie user counts
            for movie_id in user_movies:
                movie_user_counts[movie_id] += 1
    
    # Filter the ratings DataFrame to only include selected users
    filtered_ratings = ratings_df[ratings_df['userId'].isin(selected_users)]
    
    # Calculate and print statistics
    avg_ratings_per_user = filtered_ratings.groupby('userId').size().mean()
    
    print(f"After filtering: {len(filtered_ratings)} ratings from {len(selected_users)} users")
    print(f"Retained {len(filtered_ratings)/total_ratings:.1%} of original ratings")
    print(f"Average ratings per selected user: {avg_ratings_per_user:.1f}")
    
    return filtered_ratings

In [46]:
def remove_columns(filtered_df):
    if 'timestamp' in filtered_df.columns:
        filtered_df.drop(columns=["timestamp"], inplace=True)
    if 'genres' in filtered_df.columns:
        filtered_df.drop(columns=["genres"], inplace=True)
    if 'title' in filtered_df.columns:
        filtered_df.drop(columns=["title"], inplace=True)
    
    print(f"Final dataset: {len(filtered_df)} ratings across {filtered_df['movieId'].nunique()} movies from {filtered_df['userId'].nunique()} users")

    return filtered_df



In [47]:
ratings = remove_columns(ratings)

Final dataset: 33832162 ratings across 83239 movies from 330975 users


In [48]:

min_avg_rating=0
user_min_percentile=0.10
user_max_percentile=0.997
min_ratings_per_movie=3
max_users_per_movie=120 # 250
user_similarity_threshold=0.15 # 0.15



In [49]:
# Step 3: Filter users by activity level
filtered_df = filter_users_by_activity(
    ratings,
    min_percentile=user_min_percentile,
    max_percentile=user_max_percentile
)

Filtering users with activity between 10th and 100th percentiles...
Before filtering: 330975 users with rating statistics:
count    330975.00000
mean        102.21969
std         232.15453
min           1.00000
25%          15.00000
50%          31.00000
75%          98.00000
max       33332.00000
Name: rating, dtype: float64
After filtering: 297732 users with rating statistics:
count    297732.000000
mean        104.751767
std         178.859589
min           6.000000
25%          16.000000
50%          38.000000
75%         109.000000
max        1686.000000
Name: rating, dtype: float64


In [50]:
ratings_df = select_diverse_users(
    filtered_df, 
    max_users_per_movie=max_users_per_movie,
    similarity_threshold=user_similarity_threshold,
)

Selecting diverse users with max 120 users per movie...
Initial dataset: 31187953 ratings from 297732 users on 67240 movies
After filtering: 351755 ratings from 6244 users
Retained 1.1% of original ratings
Average ratings per selected user: 56.3


In [51]:
ratings_df["userId"].nunique(),ratings_df["movieId"].nunique()

(6244, 35923)

In [52]:
filtered_df = remove_columns(ratings_df)


Final dataset: 351755 ratings across 35923 movies from 6244 users


In [53]:
filtered_df["userId"].nunique(),filtered_df["movieId"].nunique()

(6244, 35923)

In [54]:
# Step 2: Filter by average rating
filtered_df = filter_by_average_rating(
    movies, 
    ratings_df, 
    min_rating=min_avg_rating
)

Filtering movies with average rating >= 0...
Kept 35923 movies with average rating >= 0


In [55]:
filtered_df["userId"].nunique(),filtered_df["movieId"].nunique()

(6244, 35923)

In [56]:
filtered_df["userId"].nunique(),filtered_df["movieId"].nunique()

(6244, 35923)

In [57]:
filtered_df = filter_movies_by_popularity(
    filtered_df,
    min_ratings=min_ratings_per_movie
)

Filtering movies with at least 3 ratings...
Before filtering: 35923 movies
Movies with <3 ratings: 19634
After filtering: 16289 movies kept


In [58]:
filtered_df["userId"].nunique(),filtered_df["movieId"].nunique()

(6242, 16289)

In [59]:
def scale_ratings(matrix):

    data = matrix.copy().values
    rows = data.shape[0]
    
    # Scale each user's ratings (each row)
    for i in range(rows):
        # Find rated movies for this user
        nonzero_mask = data[i, :] > 0
        rated_indices = np.where(nonzero_mask)[0]
        
        # Get user's ratings and reshape for scaler
        user_ratings = data[i, rated_indices].reshape(-1, 1)
        
        # Apply MinMaxScaler
        scaler = MinMaxScaler()
        scaled_ratings = scaler.fit_transform(user_ratings).flatten()
        
        # Update the matrix with scaled ratings
        data[i, rated_indices] = scaled_ratings
    
    # Convert back to DataFrame
    scaled_matrix = pd.DataFrame(data, index=matrix.index, columns=matrix.columns)
    return scaled_matrix

In [60]:
def prepare_user_ratings(ratings_file_path, user_movie_matrix_columns):

    user_data = pd.read_csv(ratings_file_path)
    user_data = user_data.dropna(subset=['movieId'])
    
    # Create user-movie ratings matrix with zero values - using float dtype
    user_ratings = pd.DataFrame(0.0, index=[0], columns=user_movie_matrix_columns, dtype=np.float64)
    
    # Fill with original ratings
    for _, row in user_data.iterrows():
        movie_id = row['movieId']
        if movie_id in user_ratings.columns:
            user_ratings.loc[0, movie_id] = float(row['Rating'])
    
    # Get indices of rated movies
    nonzero_mask = user_ratings.values > 0
    rated_indices = np.where(nonzero_mask[0])[0]
    
    # Get this user's ratings
    user_rating_values = user_ratings.iloc[0, rated_indices].values.reshape(-1, 1)
    
    # Apply MinMaxScaler
    scaler = MinMaxScaler()
    scaled_ratings = scaler.fit_transform(user_rating_values).flatten()
    
    # Update the user ratings with scaled values
    user_ratings.iloc[0, rated_indices] = scaled_ratings
    
    return user_data, user_ratings

In [116]:
def get_niche_recommendations(
    ratings_df,
    movies_df,
    user_ratings=None,
    n_recommendations=20,
    movie_id_to_title_map=None,
    popularity_percentile=70,  # Exclude movies above this percentile
    min_ratings=5              # Still need some ratings for quality assurance
):
    """
    Recommends niche movies by excluding popular ones.
    
    Parameters:
    -----------
    ratings_df : pandas.DataFrame
        DataFrame containing all ratings
    movies_df : pandas.DataFrame
        DataFrame containing movie information
    user_ratings : pandas.DataFrame, optional
        User's ratings data to calculate similarity or use as input to model
    n_recommendations : int, optional
        Number of recommendations to return
    movie_id_to_title_map : dict, optional
        Dictionary mapping movie IDs to titles
    popularity_percentile : float, optional
        Percentile threshold - movies above this percentile of popularity are excluded
    min_ratings : int, optional
        Minimum number of ratings a movie must have for consideration
        
    Returns:
    --------
    list
        List of tuples (movie_id, score) of recommended niche movies
    """
    # Count ratings per movie to determine popularity
    movie_ratings_count = ratings_df.groupby('movieId').size().reset_index(name='count')
    
    # Calculate the threshold for "popular" movies (e.g., top 30% by default)
    popularity_threshold = movie_ratings_count['count'].quantile(popularity_percentile/100)
    print(f"Popularity threshold: {popularity_threshold} ratings (top {popularity_percentile}%)")
    
    # Identify movies that are not overly popular but have some minimum ratings
    niche_movie_ids = movie_ratings_count[
        (movie_ratings_count['count'] < popularity_threshold) & 
        (movie_ratings_count['count'] >= min_ratings)
    ]['movieId'].values
    
    print(f"Found {len(niche_movie_ids)} niche movies out of {len(movie_ratings_count)} total movies")
    
    # Get already rated movies to exclude from recommendations
    rated_movie_ids = set()
    if user_ratings is not None and 'movieId' in user_ratings.columns:
        rated_movie_ids = set(user_ratings['movieId'].values)
        print(f"User has rated {len(rated_movie_ids)} movies")
    
    # Filter out already rated movies from niche candidates
    niche_candidates = [mid for mid in niche_movie_ids if mid not in rated_movie_ids]
    print(f"After removing rated movies: {len(niche_candidates)} niche candidates remain")
    
    # Calculate average ratings for the niche candidates
    niche_avg_ratings = ratings_df[ratings_df['movieId'].isin(niche_candidates)].groupby('movieId')['rating'].mean()
    
    # Sort by average rating to find the best niche movies
    sorted_niche_movies = niche_avg_ratings.sort_values(ascending=False)
    
    # Get personalized recommendations if user ratings are provided
    if user_ratings is not None and len(user_ratings) > 0:
        print("Using user ratings to personalize niche recommendations...")
        
        # Create a user profile based on genres they like
        # First, get genres for each movie the user has rated highly
        high_rated_movies = user_ratings[user_ratings['rating'] >= 4.0]['movieId'].values
        user_liked_genres = set()
        
        for movie_id in high_rated_movies:
            if movie_id in movies_df['movieId'].values:
                genres = movies_df[movies_df['movieId'] == movie_id]['genres'].values[0]
                user_liked_genres.update(genres.split('|'))
        
        print(f"User seems to like these genres: {', '.join(user_liked_genres)}")
        
        # Score niche movies based on genre match with user preferences
        niche_scores = {}
        for movie_id in niche_candidates:
            if movie_id in movies_df['movieId'].values:
                genres = movies_df[movies_df['movieId'] == movie_id]['genres'].values[0]
                movie_genres = set(genres.split('|'))
                
                # Calculate genre overlap score
                genre_match = len(user_liked_genres.intersection(movie_genres)) / max(1, len(movie_genres))
                
                # Combine with average rating
                avg_rating = niche_avg_ratings.get(movie_id, 0)
                final_score = (0.7 * avg_rating) + (0.3 * genre_match * 5)  # Weight by both factors
                
                niche_scores[movie_id] = final_score
        
        # Sort by final score
        recommendations = sorted(niche_scores.items(), key=lambda x: x[1], reverse=True)[:n_recommendations]
    else:
        # If no user ratings, just recommend the highest-rated niche movies
        recommendations = [(movie_id, score) for movie_id, score in 
                          sorted_niche_movies.head(n_recommendations).items()]
    
    if movie_id_to_title_map:
        print("\nTop Niche Recommendations:")
        for movie_id, score in recommendations:
            movie_title = movie_id_to_title_map.get(movie_id, f"Unknown Movie (ID: {movie_id})")
            count = movie_ratings_count[movie_ratings_count['movieId'] == movie_id]['count'].values[0]
            print(f"Movie: {movie_title}, Score: {score:.2f}, Ratings Count: {count}")
    
    return recommendations

In [62]:
def fit_model(
    user_movie_matrix,
    n_components=50,
    max_iter=200,
    init='random',
    solver='cd',
    tol=0.0001,

    sample_size=1.0
):

    print(f"\n--- Fitting model with {sample_size*100:.0f}% of users ---")
    
    # Sample users 
    if sample_size < 1.0:
        n_users = int(user_movie_matrix.shape[0] * sample_size)
        sampled_users = np.random.choice(user_movie_matrix.index, size=n_users, replace=False)
        training_matrix = user_movie_matrix.loc[sampled_users, :]
    else:
        training_matrix = user_movie_matrix
    
    print(f"Training matrix shape: {training_matrix.shape}")
    
    # Train the model
    start_time = time.time()
    nmf = NMF(
        n_components=n_components,
        max_iter=max_iter,
        verbose=0,
        init="nndsvd",
        solver=solver,
        tol=tol,
    )
    
    user_factors = nmf.fit_transform(training_matrix)
    item_factors = nmf.components_
    training_time = time.time() - start_time
    
    print(f"Training time: {training_time:.2f} seconds")
    print(f"Iterations completed: {nmf.n_iter_}")
    print(f"Final error: {nmf.reconstruction_err_}")
    
    return nmf, training_matrix, training_time



In [63]:
def get_recommendations(
    nmf_model,
    training_matrix,
    my_ratings,
    my_data,
    n_recommendations=20,
    movie_id_to_title_map=None
):
    # Transform personal ratings into factor space
    my_user_factors = nmf_model.transform(my_ratings)
    
    # Generate predictions
    predicted_ratings = np.dot(my_user_factors, nmf_model.components_)
    predicted_ratings_df = pd.DataFrame(predicted_ratings, columns=training_matrix.columns)
    
    # Get movies already rated
    rated_movies = set(int(movie_id) for movie_id in my_data['movieId'].values)
    print(f"You've rated {len(rated_movies)} movies")
    
    # Filter for unrated movies
    unrated_movies = [m for m in training_matrix.columns if int(m) not in rated_movies]
    print(f"Found {len(unrated_movies)} movies you haven't rated")
    
    # Get recommendations
    recommendations = []
    for movie_id in unrated_movies:
        pred_rating = predicted_ratings_df.loc[0, movie_id]
        recommendations.append((movie_id, pred_rating))
    
    recommendations.sort(key=lambda x: x[1], reverse=True)
    
    top_recommendations = recommendations[:n_recommendations]
    
    if movie_id_to_title_map:
        print("\nTop Recommendations:")
        for movie_id, predicted_rating in top_recommendations:
            # Convert from 0-1 scale back to 1-5 scale
            original_scale_rating = predicted_rating * 4 + 1 # this didnt work that well tbh
            
            movie_title = movie_id_to_title_map.get(movie_id, f"Unknown Movie (ID: {movie_id})")
            print(f"Movie: {movie_title}, Similarity Rating: {predicted_rating:.2f}/1.00")
    
    return top_recommendations 

In [64]:
user_movie_matrix = filtered_df.pivot(index='userId', columns='movieId', values='rating')
user_movie_matrix = user_movie_matrix.fillna(0)
user_movie_matrix_scaled = scale_ratings(user_movie_matrix)


In [65]:
nmf_model, training_matrix, _ = fit_model(
    user_movie_matrix_scaled,
    n_components=400,
    sample_size=1,
    max_iter=800,
)




--- Fitting model with 100% of users ---
Training matrix shape: (6242, 16289)
Training time: 453.36 seconds
Iterations completed: 82
Final error: 218.1725877474704


In [66]:
import joblib
from joblib import load
from joblib import dump

In [67]:
# Save the model
dump(nmf_model, 'nmf_model_100comp.joblib')

# Save the training matrix
dump(training_matrix, 'training_matrix_100comp.joblib')

# Alternatively, save everything in one file
model_data = {
    'model': nmf_model,
    'training_matrix': training_matrix,
    'n_components': 100,
    'timestamp': pd.Timestamp.now().strftime('%Y-%m-%d %H:%M')
}
dump(model_data, 'recommender_data_100comp.joblib')

print("Model and training matrix saved successfully!")

Model and training matrix saved successfully!


In [68]:
# nmf_model = load('nmf_model800.pkl')

In [69]:
elliott_data, elliott_ratings = prepare_user_ratings(
    "data/loelliot_ratings_with_ids.csv", 
    user_movie_matrix_scaled.columns
)
ludde_data, ludde_ratings = prepare_user_ratings(
    "data/ludde_ratings_with_ids.csv", 
    user_movie_matrix_scaled.columns
)
charlie_data, charlie_ratings = prepare_user_ratings(
    "data/chaarll_ratings_with_ids.csv", 
    user_movie_matrix_scaled.columns
)
tilda_data, tilda_ratings = prepare_user_ratings(
    "data/tilda_h_ratings_with_ids.csv",
    user_movie_matrix_scaled.columns
)
zorrodor_data, zorrodor_ratings = prepare_user_ratings(
    "data/zorrodor_ratings_with_ids.csv",
    user_movie_matrix_scaled.columns
)
lukas_data, lukas_ratings = prepare_user_ratings(
    "data/lukas_ratings_with_ids.csv",
    user_movie_matrix_scaled.columns
)
clara_data, clara_ratings = prepare_user_ratings(
    "data/clar_ratings_with_ids.csv",
    user_movie_matrix_scaled.columns
)
voided_data, voided_ratings = prepare_user_ratings(
    "data/voided_ratings_with_ids.csv",
    user_movie_matrix_scaled.columns
)
skellic_data, skellic_ratings = prepare_user_ratings(
    "data/skellic_ratings_with_ids.csv",
    user_movie_matrix_scaled.columns
)
liv_data, liv_ratings = prepare_user_ratings(
    "data/liv_ratings_with_ids.csv",
    user_movie_matrix_scaled.columns
)

User has rated 415 movies
Total movies available before filtering: 16289
After removing rated movies: 15874 remain
After applying min_ratings=1: 15874 remain
Excluding top 11111 out of 15874 movies (70%)
After popularity exclusion: 4763 candidates remain

Sample of excluded popular movies:
1. Craft, The (1996) (120 ratings)
2. Miracle on 34th Street (1994) (120 ratings)
3. One Flew Over the Cuckoo's Nest (1975) (120 ratings)
4. Lethal Weapon 2 (1989) (120 ratings)
5. Departed, The (2006) (120 ratings)


In [117]:
niche_recommendations = get_niche_recommendations(
    ratings_df=ludde_ratings,
    movies_df=movies,
    n_recommendations=20,
    movie_id_to_title_map=movie_id_to_title,
    popularity_percentile=70,  # Exclude the top 70% most popular movies
    min_ratings=10             # Ensure some minimum quality threshold
)

KeyError: 'movieId'

In [114]:
recommendations = get_recommendations(
    nmf_model=nmf_model,
    training_matrix=training_matrix,
    my_ratings=tilda_ratings,
    my_data=tilda_data,
    n_recommendations=200,
    movie_id_to_title_map=movie_id_to_title,
)

You've rated 285 movies
Found 16011 movies you haven't rated

Top Recommendations:
Movie: The Martian (2015), Similarity Rating: 0.41/1.00
Movie: Inside Out (2015), Similarity Rating: 0.36/1.00
Movie: Ratatouille (2007), Similarity Rating: 0.35/1.00
Movie: Room (2015), Similarity Rating: 0.34/1.00
Movie: Finding Nemo (2003), Similarity Rating: 0.34/1.00
Movie: Little Women (2019), Similarity Rating: 0.31/1.00
Movie: Donnie Darko (2001), Similarity Rating: 0.30/1.00
Movie: Ex Machina (2015), Similarity Rating: 0.30/1.00
Movie: Moana (2016), Similarity Rating: 0.29/1.00
Movie: Booksmart (2019), Similarity Rating: 0.29/1.00
Movie: Harry Potter and the Deathly Hallows: Part 2 (2011), Similarity Rating: 0.29/1.00
Movie: Palm Springs (2020), Similarity Rating: 0.28/1.00
Movie: Kingsman: The Secret Service (2015), Similarity Rating: 0.28/1.00
Movie: The Imitation Game (2014), Similarity Rating: 0.28/1.00
Movie: Big Lebowski, The (1998), Similarity Rating: 0.28/1.00
Movie: Boyhood (2014), Simi

In [72]:

recommendations = get_recommendations(
    nmf_model=nmf_model,
    training_matrix=training_matrix,
    my_ratings=elliott_ratings,
    my_data=elliott_data,
    n_recommendations=200,
    movie_id_to_title_map=movie_id_to_title,
)
# params:    
"""
min_avg_rating=2.5
user_min_percentile=0.25
user_max_percentile=0.999
min_ratings_per_movie=3
max_users_per_movie=250
user_similarity_threshold=0.15
fit: 
n_components=200,
sample_size=1,
max_iter=800
"""
# response "Jag har sett typ en tredjedel av dessa filmer kanske, men inte sen jag började med letterboxd"

You've rated 177 movies
Found 16142 movies you haven't rated

Top Recommendations:
Movie: Gone Girl (2014), Similarity Rating: 0.24/1.00
Movie: Crazy, Stupid, Love. (2011), Similarity Rating: 0.22/1.00
Movie: Spider-Man: Into the Spider-Verse (2018), Similarity Rating: 0.22/1.00
Movie: Annihilation (2018), Similarity Rating: 0.21/1.00
Movie: A Quiet Place (2018), Similarity Rating: 0.21/1.00
Movie: Ready Player One, Similarity Rating: 0.20/1.00
Movie: The Killing of a Sacred Deer (2017), Similarity Rating: 0.20/1.00
Movie: Avengers: Infinity War - Part II (2019), Similarity Rating: 0.19/1.00
Movie: Black Swan (2010), Similarity Rating: 0.19/1.00
Movie: Spider-Man: Far from Home (2019), Similarity Rating: 0.19/1.00
Movie: Train to Busan (2016), Similarity Rating: 0.18/1.00
Movie: Nocturnal Animals, Similarity Rating: 0.18/1.00
Movie: The Martian (2015), Similarity Rating: 0.18/1.00
Movie: Avengers: Infinity War - Part I (2018), Similarity Rating: 0.18/1.00
Movie: It Follows (2014), Simi

'\nmin_avg_rating=2.5\nuser_min_percentile=0.25\nuser_max_percentile=0.999\nmin_ratings_per_movie=3\nmax_users_per_movie=250\nuser_similarity_threshold=0.15\nfit: \nn_components=200,\nsample_size=1,\nmax_iter=800\n'

In [73]:
recommendations = get_recommendations(
    nmf_model=nmf_model,
    training_matrix=training_matrix,
    my_ratings=charlie_ratings,
    my_data=charlie_data,
    n_recommendations=200,
    movie_id_to_title_map=movie_id_to_title,
)

You've rated 81 movies
Found 16210 movies you haven't rated

Top Recommendations:
Movie: Get Out (2017), Similarity Rating: 0.19/1.00
Movie: The Martian (2015), Similarity Rating: 0.19/1.00
Movie: Children of Men (2006), Similarity Rating: 0.18/1.00
Movie: Reservoir Dogs (1992), Similarity Rating: 0.17/1.00
Movie: Her (2013), Similarity Rating: 0.17/1.00
Movie: Eternal Sunshine of the Spotless Mind (2004), Similarity Rating: 0.16/1.00
Movie: No Country for Old Men (2007), Similarity Rating: 0.16/1.00
Movie: Moon (2009), Similarity Rating: 0.16/1.00
Movie: Avatar (2009), Similarity Rating: 0.16/1.00
Movie: Pan's Labyrinth (Laberinto del fauno, El) (2006), Similarity Rating: 0.16/1.00
Movie: Baby Driver (2017), Similarity Rating: 0.16/1.00
Movie: Big Short, The (2015), Similarity Rating: 0.15/1.00
Movie: Godfather, The (1972), Similarity Rating: 0.15/1.00
Movie: Lord of the Rings: The Return of the King, The (2003), Similarity Rating: 0.15/1.00
Movie: Big Lebowski, The (1998), Similarity

In [74]:
recommendations = get_recommendations(
    nmf_model=nmf_model,
    training_matrix=training_matrix,
    my_ratings=voided_ratings,
    my_data=voided_data,
    n_recommendations=200,
    movie_id_to_title_map=movie_id_to_title,
)
# params:    
"""
min_avg_rating=2.5
user_min_percentile=0.25
user_max_percentile=0.999
min_ratings_per_movie=3
max_users_per_movie=250
user_similarity_threshold=0.15
fit: 
n_components=200,
sample_size=1,
max_iter=800
"""
"""jo men det gör dom, har typ alla förrutom guardians of the galaxy på min watchlist haha
och det stämmer att de jag redan hade sett som totoro hade jag inte rateat"""

You've rated 271 movies
Found 16026 movies you haven't rated

Top Recommendations:
Movie: Godfather, The (1972), Similarity Rating: 0.39/1.00
Movie: Interstellar (2014), Similarity Rating: 0.38/1.00
Movie: Moana (2016), Similarity Rating: 0.33/1.00
Movie: WALL·E (2008), Similarity Rating: 0.33/1.00
Movie: Prestige, The (2006), Similarity Rating: 0.32/1.00
Movie: Mulholland Drive (2001), Similarity Rating: 0.32/1.00
Movie: The Martian (2015), Similarity Rating: 0.32/1.00
Movie: Coco (2017), Similarity Rating: 0.31/1.00
Movie: Star Wars: Episode V - The Empire Strikes Back (1980), Similarity Rating: 0.30/1.00
Movie: Inception (2010), Similarity Rating: 0.30/1.00
Movie: Reservoir Dogs (1992), Similarity Rating: 0.30/1.00
Movie: Black Swan (2010), Similarity Rating: 0.30/1.00
Movie: Godfather: Part II, The (1974), Similarity Rating: 0.29/1.00
Movie: Departed, The (2006), Similarity Rating: 0.29/1.00
Movie: 2001: A Space Odyssey (1968), Similarity Rating: 0.29/1.00
Movie: Seven Samurai (Shi

'jo men det gör dom, har typ alla förrutom guardians of the galaxy på min watchlist haha\noch det stämmer att de jag redan hade sett som totoro hade jag inte rateat'

In [75]:
recommendations = get_recommendations(
    nmf_model=nmf_model,
    training_matrix=training_matrix,
    my_ratings=zorrodor_ratings,
    my_data=zorrodor_data,
    n_recommendations=200,
    movie_id_to_title_map=movie_id_to_title,
)

# params:    
"""
min_avg_rating=2.5
user_min_percentile=0.25
user_max_percentile=0.999
min_ratings_per_movie=3
max_users_per_movie=250
user_similarity_threshold=0.15
fit: 
n_components=200,
sample_size=1,
max_iter=800
"""
# zorro:
# "Dom första 20 är alla bra filmer"
# "Eller de e nästan bara grejer jag gillar"
# "Några luriga dock men de mesta e gött"
# "Jag har rateat ganska lite också"
# "Så den har inte sååå mkt att gå på"

You've rated 45 movies
Found 16255 movies you haven't rated

Top Recommendations:
Movie: In the Mood For Love (Fa yeung nin wa) (2000), Similarity Rating: 0.22/1.00
Movie: Three Colors: White (Trzy kolory: Bialy) (1994), Similarity Rating: 0.16/1.00
Movie: Fallen Angels (Duo luo tian shi) (1995), Similarity Rating: 0.12/1.00
Movie: Three Colors: Blue (Trois couleurs: Bleu) (1993), Similarity Rating: 0.10/1.00
Movie: Stalker (1979), Similarity Rating: 0.10/1.00
Movie: Taxi Driver (1976), Similarity Rating: 0.09/1.00
Movie: Yi Yi (2000), Similarity Rating: 0.08/1.00
Movie: In Bruges (2008), Similarity Rating: 0.08/1.00
Movie: Get Out (2017), Similarity Rating: 0.07/1.00
Movie: 2046 (2004), Similarity Rating: 0.07/1.00
Movie: Mulholland Drive (2001), Similarity Rating: 0.07/1.00
Movie: Grand Budapest Hotel, The (2014), Similarity Rating: 0.07/1.00
Movie: Breaking the Waves (1996), Similarity Rating: 0.07/1.00
Movie: Great Beauty, The (Grande Bellezza, La) (2013), Similarity Rating: 0.07/1

'\nmin_avg_rating=2.5\nuser_min_percentile=0.25\nuser_max_percentile=0.999\nmin_ratings_per_movie=3\nmax_users_per_movie=250\nuser_similarity_threshold=0.15\nfit: \nn_components=200,\nsample_size=1,\nmax_iter=800\n'

In [76]:
recommendations = get_recommendations(
    nmf_model=nmf_model,
    training_matrix=training_matrix,
    my_ratings=liv_ratings,
    my_data=liv_data,
    n_recommendations=200,
    movie_id_to_title_map=movie_id_to_title,
)

You've rated 106 movies
Found 16192 movies you haven't rated

Top Recommendations:
Movie: Her (2013), Similarity Rating: 0.21/1.00
Movie: Crazy, Stupid, Love. (2011), Similarity Rating: 0.17/1.00
Movie: Grand Budapest Hotel, The (2014), Similarity Rating: 0.17/1.00
Movie: Spirited Away (Sen to Chihiro no kamikakushi) (2001), Similarity Rating: 0.17/1.00
Movie: Parasite (2019), Similarity Rating: 0.16/1.00
Movie: Love, Simon (2018), Similarity Rating: 0.16/1.00
Movie: Room (2015), Similarity Rating: 0.16/1.00
Movie: Silver Linings Playbook (2012), Similarity Rating: 0.16/1.00
Movie: Brokeback Mountain (2005), Similarity Rating: 0.15/1.00
Movie: Eighth Grade (2018), Similarity Rating: 0.14/1.00
Movie: Mean Girls (2004), Similarity Rating: 0.14/1.00
Movie: The Lobster (2015), Similarity Rating: 0.14/1.00
Movie: Moonrise Kingdom (2012), Similarity Rating: 0.14/1.00
Movie: Big Lebowski, The (1998), Similarity Rating: 0.14/1.00
Movie: Donnie Darko (2001), Similarity Rating: 0.13/1.00
Movie: 

In [77]:
recommendations = get_recommendations(
    nmf_model=nmf_model,
    training_matrix=training_matrix,
    my_ratings=skellic_ratings,
    my_data=skellic_data,
    n_recommendations=200,
    movie_id_to_title_map=movie_id_to_title,
)

You've rated 338 movies
Found 15963 movies you haven't rated

Top Recommendations:
Movie: The Martian (2015), Similarity Rating: 0.43/1.00
Movie: Interstellar (2014), Similarity Rating: 0.42/1.00
Movie: Reservoir Dogs (1992), Similarity Rating: 0.40/1.00
Movie: Your Name. (2016), Similarity Rating: 0.38/1.00
Movie: Moana (2016), Similarity Rating: 0.37/1.00
Movie: Ratatouille (2007), Similarity Rating: 0.37/1.00
Movie: Black Swan (2010), Similarity Rating: 0.37/1.00
Movie: Prestige, The (2006), Similarity Rating: 0.36/1.00
Movie: Mulholland Drive (2001), Similarity Rating: 0.35/1.00
Movie: Godfather, The (1972), Similarity Rating: 0.35/1.00
Movie: Fargo (1996), Similarity Rating: 0.35/1.00
Movie: Clockwork Orange, A (1971), Similarity Rating: 0.35/1.00
Movie: Coco (2017), Similarity Rating: 0.35/1.00
Movie: Inception (2010), Similarity Rating: 0.35/1.00
Movie: Wolf of Wall Street, The (2013), Similarity Rating: 0.34/1.00
Movie: Die Hard (1988), Similarity Rating: 0.34/1.00
Movie: Aveng