In [27]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import concurrent.futures
import cvxpy as cp
import import_ipynb
import als

In [8]:
df = pd.read_csv("data/100x100.csv")
data_df = df.pivot(index='u_id', columns='a_id', values='score').fillna(0)
data_matrix = data_df.values
print(data_df.shape)

(98, 97)


In [9]:
delta=df[['u_id', 'a_id', 'score']]
delta

Unnamed: 0,u_id,a_id,score
0,0,31240,9
1,16,31240,8
2,31,31240,9
3,50,31240,9
4,77,31240,4
...,...,...,...
1786,1098,28623,8
1787,1349,28623,8
1788,1484,28623,4
1789,3413,28623,6


In [10]:
user_id_index_map = {u_id: idx for idx, u_id in enumerate(data_df.index)}
anime_id_index_map = {a_id: idx for idx, a_id in enumerate(data_df.columns)}

def user_id_to_index(uid):
    return user_id_index_map.get(uid, -1)  # returns -1 if uid not found

def anime_id_to_index(aid):
    return anime_id_index_map.get(aid, -1) # returns -1 if aid not found

In [11]:
anime_id_to_index(35760)

-1

In [12]:
#This takes in dataframe input
def nuclear_norm_model_df(df):
    user_list = sorted(df['u_id'].unique())
    anime_list = sorted(df['a_id'].unique())

    num_users = len(user_list)
    num_anime = len(anime_list)

    user_id_to_index = {u_id: idx for idx, u_id in enumerate(user_list)}
    anime_id_to_index = {a_id: idx for idx, a_id in enumerate(anime_list)}

    R = cp.Variable((num_users, num_anime))

    delta = df[['u_id', 'a_id', 'score']]
    constraints = []

    for idx in range(len(delta)):
        u = user_id_to_index[int(delta.at[idx, 'u_id'])]
        i = anime_id_to_index[int(delta.at[idx, 'a_id'])]
        score = delta.at[idx, 'score']
        constraints.append(R[u, i] == score)

    obj = cp.Minimize(cp.normNuc(R))

    prob = cp.Problem(obj, constraints)
    prob.solve(solver=cp.SCS)

    if prob.status == cp.OPTIMAL:
        print("Optimization succeeded.")
        return R.value
    else:
        print(f"Optimization failed with status: {prob.status}")
        return None

#This takes in matrix input
def nuclear_norm_model_matrix(data_matrix):
    num_users, num_anime = data_matrix.shape

    R = cp.Variable((num_users, num_anime))

    constraints = []
    for i in range(num_users):
        for j in range(num_anime):
            if data_matrix[i, j] != 0:
                constraints.append(R[i, j] == data_matrix[i, j])

    objective = cp.Minimize(cp.normNuc(R))
    problem = cp.Problem(objective, constraints)
    problem.solve(solver=cp.SCS)

    if problem.status == cp.OPTIMAL:
        print("Optimization succeeded.")
        return R.value
    else:
        print(f"Optimization failed. Status: {problem.status}")
        return None


In [19]:
completed_matrix = nuclear_norm_model_matrix(data_matrix)
completed_matrix

Optimization succeeded.


array([[ 9.49241143,  6.341012  ,  4.82859751, ...,  7.0628775 ,
         8.3159194 ,  7.42401477],
       [ 7.53858754,  7.50951606,  6.86393561, ...,  5.32323032,
         6.22940897,  7.28093332],
       [10.07172448,  7.74447624,  6.23664432, ...,  5.69805971,
         8.44565121,  8.66805702],
       ...,
       [ 9.75696905,  9.        ,  8.99999999, ...,  5.86471333,
         9.07618339,  8.68743185],
       [ 8.99374963,  8.61693734,  8.08403321, ...,  6.71098589,
         8.74149551,  9.28175677],
       [ 8.58730079,  6.94003213,  6.65879105, ...,  6.67842245,
         6.60914645,  7.83763827]])

In [22]:
def top_k_largest(matrix, k=5):
    flat_indices = np.argpartition(matrix.ravel(), -k)[-k:]
    sorted_indices = flat_indices[np.argsort(matrix.ravel()[flat_indices])[::-1]]
    top_values = [(matrix.flat[idx], np.unravel_index(idx, matrix.shape)) for idx in sorted_indices]
    return top_values


top_k_largest(completed_matrix)

[(np.float64(11.57970503263862), (np.int64(23), np.int64(0))),
 (np.float64(11.242189073121445), (np.int64(14), np.int64(0))),
 (np.float64(11.047223453797033), (np.int64(41), np.int64(0))),
 (np.float64(10.834692074230773), (np.int64(61), np.int64(0))),
 (np.float64(10.831137085404324), (np.int64(47), np.int64(11)))]