In [1]:
import pandas as pd
from sklearn.decomposition import NMF
import numpy as np
import time
from sklearn.preprocessing import MinMaxScaler


In [2]:
ratings= pd.read_csv('./data/ratings.csv')
movies = pd.read_csv('./data/movies.csv')
movie_id_to_title = dict(zip(movies['movieId'], movies['title']))



In [3]:
def filter_by_average_rating(movies_df, ratings_df, min_rating=0):
    print(f"Filtering movies with average rating >= {min_rating}...")
    
    movies_with_ratings = pd.merge(movies_df, ratings_df, on='movieId', how='inner')
    
    average_ratings = movies_with_ratings.groupby('movieId')['rating'].mean()
    
    popular_movies = average_ratings[average_ratings >= min_rating].index
    
    filtered_movies_with_ratings = movies_with_ratings[movies_with_ratings['movieId'].isin(popular_movies)]
    
    print(f"Kept {len(popular_movies)} movies with average rating >= {min_rating}")
    
    return filtered_movies_with_ratings

In [4]:

def filter_users_by_activity(ratings_df, min_percentile, max_percentile):

    print(f"Filtering users with activity between {min_percentile*100:.0f}th and {max_percentile*100:.0f}th percentiles...")
    
    # Count ratings per user
    user_rating_counts = ratings_df.groupby('userId')['rating'].count()
    print(f"Before filtering: {len(user_rating_counts)} users with rating statistics:")
    print(user_rating_counts.describe())
    
    # Get percentile thresholds
    min_threshold = user_rating_counts.quantile(min_percentile)
    max_threshold = user_rating_counts.quantile(max_percentile)
    
    # Filter users
    filtered_users = user_rating_counts[(user_rating_counts >= min_threshold) & 
                                       (user_rating_counts <= max_threshold)].index
    
    filtered_ratings = ratings_df[ratings_df['userId'].isin(filtered_users)]
    
    # Get statistics after filtering
    user_rating_counts_after = filtered_ratings.groupby('userId')['rating'].count()
    print(f"After filtering: {len(user_rating_counts_after)} users with rating statistics:")
    print(user_rating_counts_after.describe())
    
    return filtered_ratings

In [5]:

def filter_movies_by_popularity(ratings_df, min_ratings):

    print(f"Filtering movies with at least {min_ratings} ratings...")
    
    # Count ratings per movie
    movie_rating_counts = ratings_df.groupby('movieId').size()
    
    print(f"Before filtering: {len(movie_rating_counts)} movies")
    print(f"Movies with <{min_ratings} ratings: {(movie_rating_counts < min_ratings).sum()}")
    
    # Filter movies with sufficient ratings
    popular_movies = movie_rating_counts[movie_rating_counts >= min_ratings].index
    
    filtered_ratings = ratings_df[ratings_df['movieId'].isin(popular_movies)]
    
    print(f"After filtering: {len(popular_movies)} movies kept")
    
    return filtered_ratings

In [6]:
def select_diverse_users(ratings_df, max_users_per_movie=1000, similarity_threshold=0.3):

    print(f"Selecting diverse users with max {max_users_per_movie} users per movie...")
    

    # Get initial statistics
    total_users = ratings_df['userId'].nunique()
    total_movies = ratings_df['movieId'].nunique()
    total_ratings = len(ratings_df)
    
    print(f"Initial dataset: {total_ratings} ratings from {total_users} users on {total_movies} movies")
    
    # Create a movie-to-users dictionary to track how many users rated each movie
    movie_to_users = {}
    for movie_id, group in ratings_df.groupby('movieId'):
        movie_to_users[movie_id] = set(group['userId'])
    
    # Create a user-to-movies dictionary
    user_to_movies = {}
    for user_id, group in ratings_df.groupby('userId'):
        user_to_movies[user_id] = set(group['movieId'])
    
    # Sort users by the number of ratings (to prioritize users with fewer ratings)
    user_rating_counts = ratings_df.groupby('userId').size()
    sorted_users = user_rating_counts.sort_values().index.tolist()
    
    # Shuffle the users to avoid bias toward specific user IDs
    np.random.shuffle(sorted_users)
    
    selected_users = set()
    movie_user_counts = {movie_id: 0 for movie_id in movie_to_users.keys()}
    
    # Process users in the sorted order
    for user_id in sorted_users:
        user_movies = user_to_movies[user_id]
        
        # Check if this user would exceed the max users for any movie
        exceeds_limit = False
        for movie_id in user_movies:
            if movie_user_counts[movie_id] >= max_users_per_movie:
                exceeds_limit = True
                break
        
        # Check similarity with already selected users
        too_similar = False
        if not exceeds_limit:
            for selected_user in selected_users:
                selected_user_movies = user_to_movies[selected_user]
                
                # Calculate Jaccard similarity between users
                intersection = len(user_movies.intersection(selected_user_movies))
                union = len(user_movies.union(selected_user_movies))
                
                if union > 0:
                    similarity = intersection / union
                    if similarity > similarity_threshold:
                        too_similar = True
                        break
        
        # Add user if they don't exceed limits and aren't too similar
        if not exceeds_limit and not too_similar:
            selected_users.add(user_id)
            
            # Update movie user counts
            for movie_id in user_movies:
                movie_user_counts[movie_id] += 1
    
    # Filter the ratings DataFrame to only include selected users
    filtered_ratings = ratings_df[ratings_df['userId'].isin(selected_users)]
    
    print(f"After filtering: {len(filtered_ratings)} ratings from {len(selected_users)} users")
    print(f"Retained {len(filtered_ratings)/total_ratings:.1%} of original ratings")
    
    return filtered_ratings

In [7]:
def remove_columns(filtered_df):
    if 'timestamp' in filtered_df.columns:
        filtered_df.drop(columns=["timestamp"], inplace=True)
    if 'genres' in filtered_df.columns:
        filtered_df.drop(columns=["genres"], inplace=True)
    if 'title' in filtered_df.columns:
        filtered_df.drop(columns=["title"], inplace=True)
    
    print(f"Final dataset: {len(filtered_df)} ratings across {filtered_df['movieId'].nunique()} movies from {filtered_df['userId'].nunique()} users")

    return filtered_df



In [8]:
ratings = remove_columns(ratings)

Final dataset: 33832162 ratings across 83239 movies from 330975 users


In [9]:

min_avg_rating=0
user_min_percentile=0.25
user_max_percentile=0.999
min_ratings_per_movie=2
max_users_per_movie=110 # 250
user_similarity_threshold=0.15 # 0.15



In [10]:
# Step 3: Filter users by activity level
filtered_df = filter_users_by_activity(
    ratings,
    min_percentile=user_min_percentile,
    max_percentile=user_max_percentile
)

Filtering users with activity between 25th and 100th percentiles...
Before filtering: 330975 users with rating statistics:
count    330975.00000
mean        102.21969
std         232.15453
min           1.00000
25%          15.00000
50%          31.00000
75%          98.00000
max       33332.00000
Name: rating, dtype: float64
After filtering: 253970 users with rating statistics:
count    253970.000000
mean        126.353069
std         212.502895
min          15.000000
25%          23.000000
50%          51.000000
75%         132.000000
max        2546.000000
Name: rating, dtype: float64


In [11]:
ratings_df = select_diverse_users(
    filtered_df, 
    max_users_per_movie=max_users_per_movie,
    similarity_threshold=user_similarity_threshold,
)

Selecting diverse users with max 110 users per movie...
Initial dataset: 32089889 ratings from 253970 users on 73029 movies
After filtering: 172050 ratings from 6930 users
Retained 0.5% of original ratings


In [12]:
ratings_df["userId"].nunique(),ratings_df["movieId"].nunique()

(6930, 15449)

In [13]:
filtered_df = remove_columns(ratings_df)


Final dataset: 172050 ratings across 15449 movies from 6930 users


In [14]:
filtered_df["userId"].nunique(),filtered_df["movieId"].nunique()

(6930, 15449)

In [15]:
# Step 2: Filter by average rating
filtered_df = filter_by_average_rating(
    movies, 
    ratings_df, 
    min_rating=min_avg_rating
)

Filtering movies with average rating >= 0...
Kept 15449 movies with average rating >= 0


In [16]:
filtered_df["userId"].nunique(),filtered_df["movieId"].nunique()

(6930, 15449)

In [17]:
filtered_df["userId"].nunique(),filtered_df["movieId"].nunique()

(6930, 15449)

In [18]:
filtered_df = filter_movies_by_popularity(
    filtered_df,
    min_ratings=min_ratings_per_movie
)

Filtering movies with at least 2 ratings...
Before filtering: 15449 movies
Movies with <2 ratings: 6389
After filtering: 9060 movies kept


In [19]:
filtered_df["userId"].nunique(),filtered_df["movieId"].nunique()

(6929, 9060)

In [20]:
def scale_ratings(matrix):

    data = matrix.copy().values
    rows = data.shape[0]
    
    # Scale each user's ratings (each row)
    for i in range(rows):
        # Find rated movies for this user
        nonzero_mask = data[i, :] > 0
        rated_indices = np.where(nonzero_mask)[0]
        
        # Get user's ratings and reshape for scaler
        user_ratings = data[i, rated_indices].reshape(-1, 1)
        
        # Apply MinMaxScaler
        scaler = MinMaxScaler()
        scaled_ratings = scaler.fit_transform(user_ratings).flatten()
        
        # Update the matrix with scaled ratings
        data[i, rated_indices] = scaled_ratings
    
    # Convert back to DataFrame
    scaled_matrix = pd.DataFrame(data, index=matrix.index, columns=matrix.columns)
    return scaled_matrix

In [21]:
def prepare_user_ratings(ratings_file_path, user_movie_matrix_columns):

    user_data = pd.read_csv(ratings_file_path)
    user_data = user_data.dropna(subset=['movieId'])
    
    # Create user-movie ratings matrix with zero values - using float dtype
    user_ratings = pd.DataFrame(0.0, index=[0], columns=user_movie_matrix_columns, dtype=np.float64)
    
    # Fill with original ratings
    for _, row in user_data.iterrows():
        movie_id = row['movieId']
        if movie_id in user_ratings.columns:
            user_ratings.loc[0, movie_id] = float(row['Rating'])
    
    # Get indices of rated movies
    nonzero_mask = user_ratings.values > 0
    rated_indices = np.where(nonzero_mask[0])[0]
    
    # Get this user's ratings
    user_rating_values = user_ratings.iloc[0, rated_indices].values.reshape(-1, 1)
    
    # Apply MinMaxScaler
    scaler = MinMaxScaler()
    scaled_ratings = scaler.fit_transform(user_rating_values).flatten()
    
    # Update the user ratings with scaled values
    user_ratings.iloc[0, rated_indices] = scaled_ratings
    
    return user_data, user_ratings

In [22]:
def get_recommendations1(user_ratings, nmf_model, movie_data, ratings_data, n=10, min_ratings=5):
    """
    Get movie recommendations for a user based on their ratings.
    
    Parameters:
    -----------
    user_ratings : pandas.DataFrame
        DataFrame with user ratings in the same format as the training matrix
    nmf_model : sklearn.decomposition.NMF
        Trained NMF model
    movie_data : pandas.DataFrame
        DataFrame with movie information (title, genres)
    ratings_data : pandas.DataFrame
        DataFrame with all ratings (userId, movieId, rating)
    n : int
        Number of recommendations to return
    min_ratings : int
        Minimum number of ratings a movie must have to be recommended
        
    Returns:
    --------
    pandas.DataFrame
        DataFrame with recommended movies
    """
    # Get user latent factors for the input user
    user_factors = nmf_model.transform(user_ratings)
    
    # Get item latent factors from the model
    item_factors = nmf_model.components_
    
    # Calculate predicted ratings for all movies
    predicted_ratings = np.dot(user_factors, item_factors).flatten()
    
    # Create a DataFrame with predicted ratings
    recommendations = pd.DataFrame({
        'movieId': user_ratings.columns,
        'predicted_rating': predicted_ratings
    })
    
    # Calculate average rating for each movie from the ratings dataframe
    movie_avg_ratings = ratings_data.groupby('movieId')['rating'].agg(['mean', 'count']).reset_index()
    movie_avg_ratings.columns = ['movieId', 'avg_rating', 'rating_count']
    
    # Create mappings
    movie_to_avg_rating = dict(zip(movie_avg_ratings['movieId'], movie_avg_ratings['avg_rating']))
    movie_to_rating_count = dict(zip(movie_avg_ratings['movieId'], movie_avg_ratings['rating_count']))
    
    # Add average ratings and rating counts
    recommendations['avg_rating'] = recommendations['movieId'].map(movie_to_avg_rating).fillna(0)
    recommendations['rating_count'] = recommendations['movieId'].map(movie_to_rating_count).fillna(0)
    
    # Normalize both scores for fair combination (handle edge cases)
    if recommendations['predicted_rating'].max() != recommendations['predicted_rating'].min():
        recommendations['predicted_norm'] = (recommendations['predicted_rating'] - recommendations['predicted_rating'].min()) / \
                                         (recommendations['predicted_rating'].max() - recommendations['predicted_rating'].min())
    else:
        recommendations['predicted_norm'] = 1.0
        
    if recommendations['avg_rating'].max() != recommendations['avg_rating'].min():
        recommendations['avg_norm'] = (recommendations['avg_rating'] - recommendations['avg_rating'].min()) / \
                                   (recommendations['avg_rating'].max() - recommendations['avg_rating'].min())
    else:
        recommendations['avg_norm'] = 1.0
    
    # Create composite score: 90% predicted rating + 10% average rating
    recommendations['composite_score'] = 0.9 * recommendations['predicted_norm'] + 0.1 * recommendations['avg_norm']
    
    # Filter out movies the user has already rated
    rated_movies = user_ratings.columns[user_ratings.values[0] > 0]
    unrated_movies = recommendations[~recommendations['movieId'].isin(rated_movies)]
    
    # Filter out movies with too few ratings
    unrated_movies = unrated_movies[unrated_movies['rating_count'] >= min_ratings]
    
    # Sort by composite score and get top N
    top_recommendations = unrated_movies.sort_values('composite_score', ascending=False).head(n)
    
    # Merge with movie information
    top_recommendations = pd.merge(top_recommendations, movie_data[['movieId', 'title', 'genres']], on='movieId')
    
    # Format the output
    result = top_recommendations[['movieId', 'title', 'genres', 'predicted_rating', 'avg_rating', 'rating_count', 'composite_score']]
    result = result.sort_values('composite_score', ascending=False)
    
    return result

In [23]:
def fit_model(
    user_movie_matrix_scaled,
    n_components=50,
    max_iter=200,
    init='random',
    solver='cd',
    tol=0.0001,

    sample_size=1.0
):

    print(f"\n--- Fitting model with {sample_size*100:.0f}% of users ---")
    
    # Sample users 
    if sample_size < 1.0:
        n_users = int(user_movie_matrix.shape[0] * sample_size)
        sampled_users = np.random.choice(user_movie_matrix.index, size=n_users, replace=False)
        training_matrix = user_movie_matrix.loc[sampled_users, :]
    else:
        training_matrix = user_movie_matrix
    
    print(f"Training matrix shape: {training_matrix.shape}")
    
    # Train the model
    start_time = time.time()
    nmf = NMF(
        n_components=n_components,
        max_iter=max_iter,
        verbose=0,
        init="nndsvd",
        #solver=solver,
        #tol=tol,
        #l1_ratio=l1_ratio
    )
    
    user_factors = nmf.fit_transform(training_matrix)
    item_factors = nmf.components_
    training_time = time.time() - start_time
    
    print(f"Training time: {training_time:.2f} seconds")
    print(f"Iterations completed: {nmf.n_iter_}")
    print(f"Final error: {nmf.reconstruction_err_}")
    
    return nmf, training_matrix, training_time



In [24]:
def get_recommendations(
    nmf_model,
    training_matrix,
    my_ratings,
    my_data,
    n_recommendations=20,
    movie_id_to_title_map=None
):
    # Transform personal ratings into factor space
    my_user_factors = nmf_model.transform(my_ratings)
    
    # Generate predictions
    predicted_ratings = np.dot(my_user_factors, nmf_model.components_)
    predicted_ratings_df = pd.DataFrame(predicted_ratings, columns=training_matrix.columns)
    
    # Get movies already rated
    rated_movies = set(int(movie_id) for movie_id in my_data['movieId'].values)
    print(f"You've rated {len(rated_movies)} movies")
    
    # Filter for unrated movies
    unrated_movies = [m for m in training_matrix.columns if int(m) not in rated_movies]
    print(f"Found {len(unrated_movies)} movies you haven't rated")
    
    # Get recommendations
    recommendations = []
    for movie_id in unrated_movies:
        pred_rating = predicted_ratings_df.loc[0, movie_id]
        recommendations.append((movie_id, pred_rating))
    
    recommendations.sort(key=lambda x: x[1], reverse=True)
    
    top_recommendations = recommendations[:n_recommendations]
    
    if movie_id_to_title_map:
        print("\nTop Recommendations:")
        for movie_id, predicted_rating in top_recommendations:
            # Convert from 0-1 scale back to 1-5 scale
            original_scale_rating = predicted_rating * 4 + 1 # this didnt work that well tbh
            
            movie_title = movie_id_to_title_map.get(movie_id, f"Unknown Movie (ID: {movie_id})")
            print(f"Movie: {movie_title}, Similarity Rating: {predicted_rating:.2f}/1.00")
    
    return top_recommendations 

In [25]:
user_movie_matrix = filtered_df.pivot(index='userId', columns='movieId', values='rating')
user_movie_matrix = user_movie_matrix.fillna(0)
user_movie_matrix_scaled = scale_ratings(user_movie_matrix)


In [None]:
nmf_model, training_matrix, _ = fit_model(
    user_movie_matrix_scaled,
    n_components=600,
    sample_size=1,
    max_iter=800,
)




--- Fitting model with 100% of users ---
Training matrix shape: (6929, 9060)


In [None]:
# save model
import joblib
joblib.dump(nmf_model, "nmf_model.pkl") 

['nmf_model800.pkl']

In [None]:
elliott_data, elliott_ratings = prepare_user_ratings(
    "data/loelliot_ratings_with_ids.csv", 
    user_movie_matrix_scaled.columns
)
ludde_data, ludde_ratings = prepare_user_ratings(
    "data/ludde_ratings_with_ids.csv", 
    user_movie_matrix_scaled.columns
)
charlie_data, charlie_ratings = prepare_user_ratings(
    "data/chaarll_ratings_with_ids.csv", 
    user_movie_matrix_scaled.columns
)
tilda_data, tilda_ratings = prepare_user_ratings(
    "data/tilda_h_ratings_with_ids.csv",
    user_movie_matrix_scaled.columns
)
zorrodor_data, zorrodor_ratings = prepare_user_ratings(
    "data/zorrodor_ratings_with_ids.csv",
    user_movie_matrix_scaled.columns
)
lukas_data, lukas_ratings = prepare_user_ratings(
    "data/lukas_ratings_with_ids.csv",
    user_movie_matrix_scaled.columns
)
clara_data, clara_ratings = prepare_user_ratings(
    "data/clar_ratings_with_ids.csv",
    user_movie_matrix_scaled.columns
)
voided_data, voided_ratings = prepare_user_ratings(
    "data/voided_ratings_with_ids.csv",
    user_movie_matrix_scaled.columns
)
skellic_data, skellic_ratings = prepare_user_ratings(
    "data/skellic_ratings_with_ids.csv",
    user_movie_matrix_scaled.columns
)

In [None]:
recommendations1 = get_recommendations1(
    user_ratings=ludde_ratings,
    nmf_model=nmf_model,
    movie_data=movies,
    ratings_data=ratings,  # Add this parameter
    n=20,
    min_ratings=5
)
print("\nRecommendations for Ludde:")
print(recommendations1)


Recommendations for Ludde:
    movieId                                              title  \
0    152081                                    Zootopia (2016)   
1      1089                              Reservoir Dogs (1992)   
2     81564                                    Megamind (2010)   
3    203375                             Ford v. Ferrari (2019)   
4      1136             Monty Python and the Holy Grail (1975)   
5     71535                                  Zombieland (2009)   
6     56782                         There Will Be Blood (2007)   
7      4878                                Donnie Darko (2001)   
8    177765                                        Coco (2017)   
9       293  Léon: The Professional (a.k.a. The Professiona...   
10   183869                                  Hereditary (2018)   
11      541                                Blade Runner (1982)   
12     1265                               Groundhog Day (1993)   
13    72226                           Fantastic 

In [None]:
recommendations = get_recommendations(
    nmf_model=nmf_model,
    training_matrix=training_matrix,
    my_ratings=tilda_ratings,
    my_data=tilda_data,
    n_recommendations=200,
    movie_id_to_title_map=movie_id_to_title,
)

You've rated 285 movies
Found 9323 movies you haven't rated

Top Recommendations:
Movie: Pulp Fiction (1994), Similarity Rating: 0.52/1.00
Movie: Fantastic Mr. Fox (2009), Similarity Rating: 0.49/1.00
Movie: Matrix, The (1999), Similarity Rating: 0.42/1.00
Movie: Hereditary (2018), Similarity Rating: 0.40/1.00
Movie: Little Women (2019), Similarity Rating: 0.38/1.00
Movie: Kingsman: The Secret Service (2015), Similarity Rating: 0.36/1.00
Movie: Ex Machina (2015), Similarity Rating: 0.35/1.00
Movie: Inside Out (2015), Similarity Rating: 0.35/1.00
Movie: Donnie Darko (2001), Similarity Rating: 0.33/1.00
Movie: Meet the Parents (2000), Similarity Rating: 0.33/1.00
Movie: Big Short, The (2015), Similarity Rating: 0.32/1.00
Movie: Zootopia (2016), Similarity Rating: 0.31/1.00
Movie: Call Me by Your Name (2017), Similarity Rating: 0.31/1.00
Movie: Ferris Bueller's Day Off (1986), Similarity Rating: 0.31/1.00
Movie: Bridget Jones's Diary (2001), Similarity Rating: 0.31/1.00
Movie: A Quiet Pla

In [None]:

recommendations = get_recommendations(
    nmf_model=nmf_model,
    training_matrix=training_matrix,
    my_ratings=elliott_ratings,
    my_data=elliott_data,
    n_recommendations=200,
    movie_id_to_title_map=movie_id_to_title,
)
# params:    
"""
min_avg_rating=2.5
user_min_percentile=0.25
user_max_percentile=0.999
min_ratings_per_movie=3
max_users_per_movie=250
user_similarity_threshold=0.15
fit: 
n_components=200,
sample_size=1,
max_iter=800
"""
# response "Jag har sett typ en tredjedel av dessa filmer kanske, men inte sen jag började med letterboxd"

You've rated 177 movies
Found 9458 movies you haven't rated

Top Recommendations:
Movie: Spider-Man: Into the Spider-Verse (2018), Similarity Rating: 0.41/1.00
Movie: Moonlight, Similarity Rating: 0.28/1.00
Movie: Portrait of a Lady on Fire (2019), Similarity Rating: 0.27/1.00
Movie: The Handmaiden (2016), Similarity Rating: 0.26/1.00
Movie: Ford v. Ferrari (2019), Similarity Rating: 0.26/1.00
Movie: Once Upon a Time in Hollywood (2019), Similarity Rating: 0.26/1.00
Movie: Uncut Gems (2019), Similarity Rating: 0.25/1.00
Movie: Fantastic Mr. Fox (2009), Similarity Rating: 0.24/1.00
Movie: Royal Tenenbaums, The (2001), Similarity Rating: 0.24/1.00
Movie: Clueless (1995), Similarity Rating: 0.23/1.00
Movie: Marriage Story (2019), Similarity Rating: 0.23/1.00
Movie: Moonrise Kingdom (2012), Similarity Rating: 0.23/1.00
Movie: Pitch Perfect (2012), Similarity Rating: 0.20/1.00
Movie: The Witch (2015), Similarity Rating: 0.20/1.00
Movie: The Lighthouse (2019), Similarity Rating: 0.20/1.00
Mo

'\nmin_avg_rating=2.5\nuser_min_percentile=0.25\nuser_max_percentile=0.999\nmin_ratings_per_movie=3\nmax_users_per_movie=250\nuser_similarity_threshold=0.15\nfit: \nn_components=200,\nsample_size=1,\nmax_iter=800\n'

In [None]:
recommendations = get_recommendations(
    nmf_model=nmf_model,
    training_matrix=training_matrix,
    my_ratings=charlie_ratings,
    my_data=charlie_data,
    n_recommendations=200,
    movie_id_to_title_map=movie_id_to_title,
)

You've rated 81 movies
Found 9223 movies you haven't rated

Top Recommendations:
Movie: The Martian (2015), Similarity Rating: 0.64/1.00
Movie: Get Out (2017), Similarity Rating: 0.40/1.00
Movie: Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981), Similarity Rating: 0.38/1.00
Movie: Aliens (1986), Similarity Rating: 0.37/1.00
Movie: Forrest Gump (1994), Similarity Rating: 0.34/1.00
Movie: Silence of the Lambs, The (1991), Similarity Rating: 0.33/1.00
Movie: Wolf of Wall Street, The (2013), Similarity Rating: 0.29/1.00
Movie: Gone Girl (2014), Similarity Rating: 0.29/1.00
Movie: Lord of the Rings: The Return of the King, The (2003), Similarity Rating: 0.28/1.00
Movie: Sixth Sense, The (1999), Similarity Rating: 0.27/1.00
Movie: Dead Poets Society (1989), Similarity Rating: 0.27/1.00
Movie: Lord of the Rings: The Two Towers, The (2002), Similarity Rating: 0.27/1.00
Movie: Midsommar (2019), Similarity Rating: 0.27/1.00
Movie: American Beauty (1999), Similarity 

In [None]:
recommendations = get_recommendations(
    nmf_model=nmf_model,
    training_matrix=training_matrix,
    my_ratings=voided_ratings,
    my_data=voided_data,
    n_recommendations=200,
    movie_id_to_title_map=movie_id_to_title,
)
# params:    
"""
min_avg_rating=2.5
user_min_percentile=0.25
user_max_percentile=0.999
min_ratings_per_movie=3
max_users_per_movie=250
user_similarity_threshold=0.15
fit: 
n_components=200,
sample_size=1,
max_iter=800
"""
"""jo men det gör dom, har typ alla förrutom guardians of the galaxy på min watchlist haha
och det stämmer att de jag redan hade sett som totoro hade jag inte rateat"""

You've rated 271 movies
Found 9050 movies you haven't rated

Top Recommendations:
Movie: Fargo (1996), Similarity Rating: 0.58/1.00
Movie: Braveheart (1995), Similarity Rating: 0.53/1.00
Movie: Matrix, The (1999), Similarity Rating: 0.49/1.00
Movie: Reservoir Dogs (1992), Similarity Rating: 0.47/1.00
Movie: The Imitation Game (2014), Similarity Rating: 0.46/1.00
Movie: Black Swan (2010), Similarity Rating: 0.46/1.00
Movie: Lost in Translation (2003), Similarity Rating: 0.44/1.00
Movie: Gone Girl (2014), Similarity Rating: 0.43/1.00
Movie: My Neighbor Totoro (Tonari no Totoro) (1988), Similarity Rating: 0.42/1.00
Movie: Shawshank Redemption, The (1994), Similarity Rating: 0.38/1.00
Movie: Good Will Hunting (1997), Similarity Rating: 0.37/1.00
Movie: Jojo Rabbit (2019), Similarity Rating: 0.37/1.00
Movie: City of God (Cidade de Deus) (2002), Similarity Rating: 0.36/1.00
Movie: Zodiac (2007), Similarity Rating: 0.35/1.00
Movie: Dr. Strangelove or: How I Learned to Stop Worrying and Love t

'jo men det gör dom, har typ alla förrutom guardians of the galaxy på min watchlist haha\noch det stämmer att de jag redan hade sett som totoro hade jag inte rateat'

In [None]:
recommendations = get_recommendations(
    nmf_model=nmf_model,
    training_matrix=training_matrix,
    my_ratings=zorrodor_ratings,
    my_data=zorrodor_data,
    n_recommendations=200,
    movie_id_to_title_map=movie_id_to_title,
)

# params:    
"""
min_avg_rating=2.5
user_min_percentile=0.25
user_max_percentile=0.999
min_ratings_per_movie=3
max_users_per_movie=250
user_similarity_threshold=0.15
fit: 
n_components=200,
sample_size=1,
max_iter=800
"""
# zorro:
# "Dom första 20 är alla bra filmer"
# "Eller de e nästan bara grejer jag gillar"
# "Några luriga dock men de mesta e gött"
# "Jag har rateat ganska lite också"
# "Så den har inte sååå mkt att gå på"

You've rated 45 movies
Found 9270 movies you haven't rated

Top Recommendations:
Movie: Welcome to the Dollhouse (1995), Similarity Rating: 0.35/1.00
Movie: Aliens (1986), Similarity Rating: 0.27/1.00
Movie: Rushmore (1998), Similarity Rating: 0.25/1.00
Movie: Royal Tenenbaums, The (2001), Similarity Rating: 0.23/1.00
Movie: Blade Runner (1982), Similarity Rating: 0.22/1.00
Movie: Three Colors: Blue (Trois couleurs: Bleu) (1993), Similarity Rating: 0.21/1.00
Movie: Three Colors: White (Trzy kolory: Bialy) (1994), Similarity Rating: 0.16/1.00
Movie: Big Lebowski, The (1998), Similarity Rating: 0.15/1.00
Movie: Gosford Park (2001), Similarity Rating: 0.14/1.00
Movie: Shallow Grave (1994), Similarity Rating: 0.14/1.00
Movie: Waiting for Guffman (1996), Similarity Rating: 0.14/1.00
Movie: Parasite (2019), Similarity Rating: 0.12/1.00
Movie: Life Aquatic with Steve Zissou, The (2004), Similarity Rating: 0.11/1.00
Movie: 10 Things I Hate About You (1999), Similarity Rating: 0.10/1.00
Movie: 

'\nmin_avg_rating=2.5\nuser_min_percentile=0.25\nuser_max_percentile=0.999\nmin_ratings_per_movie=3\nmax_users_per_movie=250\nuser_similarity_threshold=0.15\nfit: \nn_components=200,\nsample_size=1,\nmax_iter=800\n'

In [None]:
recommendations = get_recommendations(
    nmf_model=nmf_model,
    training_matrix=training_matrix,
    my_ratings=lukas_ratings,
    my_data=lukas_data,
    n_recommendations=200,
    movie_id_to_title_map=movie_id_to_title,
)

You've rated 151 movies
Found 9173 movies you haven't rated

Top Recommendations:
Movie: Star Wars: Episode VI - Return of the Jedi (1983), Similarity Rating: 0.81/1.00
Movie: Avengers: Infinity War - Part II (2019), Similarity Rating: 0.49/1.00
Movie: Spider-Man: Into the Spider-Verse (2018), Similarity Rating: 0.45/1.00
Movie: Thor: Ragnarok (2017), Similarity Rating: 0.38/1.00
Movie: Parasite (2019), Similarity Rating: 0.36/1.00
Movie: Untitled Spider-Man Reboot (2017), Similarity Rating: 0.32/1.00
Movie: Finding Nemo (2003), Similarity Rating: 0.30/1.00
Movie: Matrix, The (1999), Similarity Rating: 0.29/1.00
Movie: Monsters, Inc. (2001), Similarity Rating: 0.29/1.00
Movie: Iron Man (2008), Similarity Rating: 0.27/1.00
Movie: Spider-Man: Far from Home (2019), Similarity Rating: 0.25/1.00
Movie: Avengers, The (2012), Similarity Rating: 0.23/1.00
Movie: Deadpool (2016), Similarity Rating: 0.22/1.00
Movie: Shrek (2001), Similarity Rating: 0.22/1.00
Movie: Doctor Strange (2016), Similar

In [None]:
recommendations = get_recommendations(
    nmf_model=nmf_model,
    training_matrix=training_matrix,
    my_ratings=clara_ratings,
    my_data=clara_data,
    n_recommendations=200,
    movie_id_to_title_map=movie_id_to_title,
)

You've rated 83 movies
Found 9226 movies you haven't rated

Top Recommendations:
Movie: Fight Club (1999), Similarity Rating: 0.32/1.00
Movie: Forrest Gump (1994), Similarity Rating: 0.31/1.00
Movie: Matrix, The (1999), Similarity Rating: 0.31/1.00
Movie: X-Men: First Class (2011), Similarity Rating: 0.28/1.00
Movie: Guardians of the Galaxy (2014), Similarity Rating: 0.25/1.00
Movie: American Beauty (1999), Similarity Rating: 0.24/1.00
Movie: Spider-Man: Into the Spider-Verse (2018), Similarity Rating: 0.24/1.00
Movie: Bourne Ultimatum, The (2007), Similarity Rating: 0.21/1.00
Movie: Untitled Spider-Man Reboot (2017), Similarity Rating: 0.19/1.00
Movie: Amazing Spider-Man, The (2012), Similarity Rating: 0.19/1.00
Movie: Lord of the Rings: The Return of the King, The (2003), Similarity Rating: 0.16/1.00
Movie: Guardians of the Galaxy 2 (2017), Similarity Rating: 0.16/1.00
Movie: Lord of the Rings: The Two Towers, The (2002), Similarity Rating: 0.15/1.00
Movie: Ratatouille (2007), Simila

In [None]:
recommendations = get_recommendations(
    nmf_model=nmf_model,
    training_matrix=training_matrix,
    my_ratings=skellic_ratings,
    my_data=skellic_data,
    n_recommendations=200,
    movie_id_to_title_map=movie_id_to_title,
)

You've rated 338 movies
Found 8990 movies you haven't rated

Top Recommendations:
Movie: Wolf of Wall Street, The (2013), Similarity Rating: 0.64/1.00
Movie: Blade Runner (1982), Similarity Rating: 0.53/1.00
Movie: Finding Nemo (2003), Similarity Rating: 0.52/1.00
Movie: Black Swan (2010), Similarity Rating: 0.52/1.00
Movie: Fargo (1996), Similarity Rating: 0.50/1.00
Movie: Inception (2010), Similarity Rating: 0.46/1.00
Movie: Scott Pilgrim vs. the World (2010), Similarity Rating: 0.45/1.00
Movie: Fight Club (1999), Similarity Rating: 0.45/1.00
Movie: Moonrise Kingdom (2012), Similarity Rating: 0.42/1.00
Movie: Zodiac (2007), Similarity Rating: 0.41/1.00
Movie: Reservoir Dogs (1992), Similarity Rating: 0.40/1.00
Movie: Terminator, The (1984), Similarity Rating: 0.40/1.00
Movie: Gladiator (2000), Similarity Rating: 0.39/1.00
Movie: The Killing of a Sacred Deer (2017), Similarity Rating: 0.39/1.00
Movie: Pirates of the Caribbean: At World's End (2007), Similarity Rating: 0.37/1.00
Movie: