In [None]:
import pickle 
import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix
from sklearn.decomposition import NMF

In [None]:
import os
print(os.getcwd())

In [None]:
ratings = pd.read_csv(r'C:\Users\shabn\neuefische\ds-capstone-recommendation\data\ml-latest-small\ratings_modified.csv')
movies = pd.read_csv(r'C:\Users\shabn\neuefische\ds-capstone-recommendation\data\ml-latest-small\movies_modified.csv')

In [None]:
ratings.info()

In [None]:
movies.info()

### Initialize a sparse user-item rating matrix

In [None]:
R = csr_matrix((ratings["rating"], (ratings["user_id"], ratings["movie_id"])))
R

In [None]:
df_r = pd.DataFrame(R.todense())
df_r

### Create a model and set the hyperparameters

In [None]:
import os
# Create the directory if it doesn't exist
os.makedirs('./data', exist_ok=True)

In [None]:

model = NMF(n_components=1000, max_iter=1000)

model.fit(R)

# Save model 
with open('./data/model_nmf.pkl', 'wb') as file:
    pickle.dump(model, file)

# Load model
with open('./data/model_nmf.pkl', 'rb') as file:
    model = pickle.load(file)

In [None]:
model.reconstruction_err_

## Recommender function

### Skip over the following code, still needs refinement

In [None]:

def recommended_movies(query, model, ratings, movies, k=10):

    # Create user vector
    df_new_user = pd.DataFrame(query, columns=movies["movie_id"], index=["new_user"])
    df_new_user_modified = df_new_user.fillna(0)

    # Create user-feature matrix P for new user
    P_new_user_matrix = model.transform(df_new_user_modified)

    # New dataframe 
    P_new_user = pd.DataFrame(
        P_new_user_matrix,
        columns=model.get_feature_names_out(),
        index=["new_user"],
    )

    # Reconstruct user-movie matrix/dataframe for new user
    Q_matrix = model.components_
    Q = pd.DataFrame(Q_matrix)
    R_hat_new_user_matrix = np.dot(P_new_user, Q)
    R_hat_new_user = pd.DataFrame(R_hat_new_user_matrix, index=["new_user"])

    ranked = R_hat_new_user.T.sort_values("new_user", ascending=False)
    recommended = ranked[~ranked.index.isin(query)].reset_index()
    recommended.columns = ["movie_id", "score"]

    # Get movie ids and corresponding titles the same order
    movie_ids = recommended.iloc[:k]["movie_id"]
    titles = [movies.loc[id]["title"] for id in movie_ids]
    return movie_ids, titles

    

In [None]:
# shabnams ratings based on movies with genre action
user_query = {
    2: 4.8, 
    1101: 5,
    1370: 4.5,
    1515: 4.9,
    1580: 5, 
    1722: 4.5,
    1831: 4.7, 
    1858: 4.2,
    62374: 4.5, 
    67923: 4.6, 
}

recommended_movies(user_query, model, ratings, movies, k=10)

### Run the codes below

In [None]:
def recommended_movies(query, model, ratings, movies, k=5):
    # Ensure movie_ids from the model match the ratings DataFrame columns
    movie_ids = ratings.columns
    
    # Create a user vector with 0s for all movies the user hasn't rated
    new_user_row = pd.Series(0, index=movie_ids)
    
    # Fill in the ratings from the query
    for movie_id, rating in query.items():
        if movie_id in movie_ids:
            new_user_row[movie_id] = rating

    # Reshape the row to create a single-row matrix for the model
    new_user_matrix = new_user_row.values.reshape(1, -1)

    # Transform the user vector using the NMF model to get the user-feature matrix P
    P_new_user_matrix = model.transform(new_user_matrix)

    # Reconstruct the user-movie matrix for the new user
    Q_matrix = model.components_
    R_hat_new_user_matrix = np.dot(P_new_user_matrix, Q_matrix)

    # Create a DataFrame for predicted scores
    predicted_scores = pd.DataFrame(R_hat_new_user_matrix, columns=movie_ids, index=["new_user"])

    # Rank movies by predicted scores
    ranked = predicted_scores.T.sort_values("new_user", ascending=False)

    # Remove movies that the user has already rated
    ranked = ranked[~ranked.index.isin(query.keys())]

    # Get top-k recommendations
    recommendations = ranked.head(k).reset_index()
    recommendations.columns = ["movie_id", "score"]

    # Merge with movie titles
    recommendations = recommendations.merge(movies, on="movie_id")

    return recommendations[["movie_id", "title", "score"]]

In [None]:
# shabnams ratings based on movies with genre action
user_query = {
    2: 4.8, 
    1101: 5,
    1370: 4.5,
    1515: 4.9,
    1580: 5,
}

recommended_movies(user_query, model, ratings, movies, k=5)

In [None]:
# shabnams ratings based on movies with genre action
user_query = {
    2: 4.8, 
    1101: 5,
    1370: 4.5,
    1515: 4.9,
    1580: 5, 
    1722: 4.5,
    1831: 4.7, 
    1858: 4.2,
    62374: 4.5, 
    67923: 4.6, 
}

movie_ids, titles = recommended_movies(user_query, model, ratings, movies, k=10)

print("Recommended movies:\n")
for i, title in enumerate(titles):
    print(f"{i+1}. {title}")
    

### Build a simple recommender 

In [49]:
def recommend_popular(query, ratings, k=10):
    """
    Function that returns a list of k unseen, most pupular movies.
    """
    # Create a list of movies by popularity
    df_popularity = (
        ratings.groupby("movie_id")
        .agg(mean=("rating", "mean"))
        .sort_values("mean", ascending=False)
        .reset_index()
        .copy()
    )

    # Filter out movie_ids the user has seen (rated)
    df_popularity = df_popularity[df_popularity["movie_id"].isin(query)]

    return df_popularity["movie_id"].head(k).to_list()

In [52]:
query = ratings[ratings["user_id"] == 4][["movie_id", "rating"]]
query = query.set_index("movie_id").to_dict()["rating"]

recommened_ids = recommend_popular(query, ratings, k=10)

recommended_movies = movies[movies["movie_id"].isin(recommened_ids)]
recommended_movies[["movie_id", "title"]]

Unnamed: 0,movie_id,title
3,4,Waiting to Exhale (1995)
6,7,Sabrina (1995)
14,15,Cutthroat Island (1995)
22,23,Assassins (1995)
25,26,Othello (1995)
32,34,Babe (1995)
152,180,Mallrats (1995)
196,230,Dolores Claiborne (1995)
281,322,Swimming with Sharks (1995)
