In [14]:
import contextlib
import io

f = io.StringIO()
with contextlib.redirect_stdout(f):
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    import concurrent.futures
    import cvxpy as cp

    import warnings
    warnings.filterwarnings("ignore", category=UserWarning)

In [15]:
df = pd.read_csv("data/100x100.csv")
data_df = df.pivot(index='u_id', columns='a_id', values='score').fillna(0)
data_matrix = data_df.values
print(data_df.shape)

(144, 131)


In [16]:
#This takes in dataframe input
def nuclear_norm_model_df(df):
    user_list = sorted(df['u_id'].unique())
    anime_list = sorted(df['a_id'].unique())

    num_users = len(user_list)
    num_anime = len(anime_list)

    user_id_to_index = {u_id: idx for idx, u_id in enumerate(user_list)}
    anime_id_to_index = {a_id: idx for idx, a_id in enumerate(anime_list)}

    R = cp.Variable((num_users, num_anime))

    delta = df[['u_id', 'a_id', 'score']]
    constraints = []

    for idx in range(len(delta)):
        u = user_id_to_index[int(delta.at[idx, 'u_id'])]
        i = anime_id_to_index[int(delta.at[idx, 'a_id'])]
        score = delta.at[idx, 'score']
        constraints.append(R[u, i] == score)

    obj = cp.Minimize(cp.normNuc(R))

    prob = cp.Problem(obj, constraints)
    prob.solve(solver=cp.SCS)

    if prob.status == cp.OPTIMAL:
        print("Optimization succeeded.")
        return R.value
    else:
        print(f"Optimization failed with status: {prob.status}")
        return None

#This takes in matrix input
def nuclear_norm_model_matrix(data_matrix):
    num_users, num_anime = data_matrix.shape

    R = cp.Variable((num_users, num_anime))

    constraints = []
    for i in range(num_users):
        for j in range(num_anime):
            if data_matrix[i, j] != 0:
                constraints.append(R[i, j] == data_matrix[i, j])

    objective = cp.Minimize(cp.normNuc(R))
    problem = cp.Problem(objective, constraints)
    problem.solve(solver=cp.SCS)

    if problem.status == cp.OPTIMAL:
        print("Optimization succeeded.")
        return R.value
    else:
        print(f"Optimization failed. Status: {problem.status}")
        return None


In [17]:
completed_matrix = nuclear_norm_model_df(df)

Optimization succeeded.


In [18]:
completed_matrix = nuclear_norm_model_matrix(data_matrix)
completed_matrix

Optimization succeeded.


array([[9.33062197, 7.78111385, 8.00000001, ..., 9.3124474 , 7.22256218,
        7.25704598],
       [8.3255952 , 6.79427541, 7.41250897, ..., 6.80217792, 6.12976039,
        7.00722474],
       [9.08114318, 8.67308476, 9.84972913, ..., 7.92731419, 7.83234011,
        8.86697239],
       ...,
       [8.64576755, 6.        , 8.17817278, ..., 6.99999999, 8.02591882,
        7.11699098],
       [6.35939123, 5.86884223, 7.51340344, ..., 5.39243337, 8.39126469,
        8.06472485],
       [6.75863935, 7.40919727, 7.8145405 , ..., 8.        , 8.00000001,
        6.99999999]])

In [19]:
def top_k_largest(matrix, k=5):
    flat_indices = np.argpartition(matrix.ravel(), -k)[-k:]
    sorted_indices = flat_indices[np.argsort(matrix.ravel()[flat_indices])[::-1]]
    top_values = [(matrix.flat[idx], np.unravel_index(idx, matrix.shape)) for idx in sorted_indices]
    return top_values


top_k_largest(completed_matrix)

[(11.600763258244847, (117, 23)),
 (11.236385309011409, (117, 55)),
 (11.215948079452929, (117, 21)),
 (11.181682146068688, (132, 18)),
 (11.181492271974369, (108, 0))]