In [1]:
import contextlib
import io

f = io.StringIO()
with contextlib.redirect_stdout(f):
    import numpy as np
    import pandas as pd
    import import_ipynb
    import baseline_model
    import als
    import spectral_regularization_model
    import nuclear_norm_model
    import warnings
    warnings.filterwarnings("ignore", category=UserWarning)

In [2]:
def recommend_anime(R, 
                    u_id,
                    df, 
                    x=5):
    
    original_matrix = df.pivot(index='u_id', columns='a_id', values='score').fillna(0)
    R_df = pd.DataFrame(R, index=original_matrix.index, columns=original_matrix.columns)
    
    # find the user row of original_matrix
    user_row = original_matrix.loc[u_id]
    
    # get the anime IDs that the user has already watched (ratings > 0)
    user_watched = user_row[user_row > 0].index.tolist()
    
    # get the anime IDs that the user has not watched (ratings == 0)
    user_not_watched = user_row[user_row == 0].index.tolist()
    
    # get predicted ratings for the unwatched anime 
    user_pred = R_df.loc[u_id, user_not_watched]
    # R_df contains the predicted ratings for all users and all anime id, but not anime names
    # find top x anime ids with the highest predicted ratings that the user has not watched, and sort them by predicted rating
    # then return the anime names and predicted ratings
    top_x = user_pred.sort_values(ascending=False).head(x)
    anime_ids = top_x.index
    # get the anime names from original_df
    anime_names = df[df['a_id'].isin(anime_ids)]['title'].unique()
    # create a DataFrame with anime names and predicted ratings
    recommendations = pd.DataFrame({
        'title': anime_names,
        'predicted_rating': top_x.values
    })
    
    return recommendations

In [3]:
# read all the datasets
df_1 = pd.read_csv("data/100x100.csv")
df_2 = pd.read_csv("data/100x100_2.csv")

# choose a random user
# chosen_user_1 = np.random.choice(df_1['u_id'].unique(), 1)[0]
# chosen_user_2 = np.random.choice(df_2['u_id'].unique(), 1)[0]

# choose a static user
chosen_user_1 = 966
chosen_user_2 = 966

print(f"Chosen user 1: {chosen_user_1}")
print(f"Chosen user 2: {chosen_user_2}")

Chosen user 1: 966
Chosen user 2: 966


In [4]:
# baseline model lambda = 5, using dataset 1
baseline_matrix_1 = baseline_model.baseline_model(5, df_1)
recommend_anime(baseline_matrix_1, chosen_user_1, df_1)

status: optimal


Unnamed: 0,title,predicted_rating
0,Monster,8.435279
1,Steins;Gate,8.411371
2,Fullmetal Alchemist: Brotherhood,8.118812
3,Yojouhan Shinwa Taikei,8.06879
4,Ginga Eiyuu Densetsu,8.061904


In [5]:
# baseline model lambda = 5, using dataset 2
baseline_matrix_2 = baseline_model.baseline_model(5, df_2)
recommend_anime(baseline_matrix_2, chosen_user_2, df_2)

status: optimal


Unnamed: 0,title,predicted_rating
0,Gekkan Shoujo Nozaki-kun,8.816964
1,Tenkuu no Shiro Laputa,8.562032
2,Ping Pong the Animation,8.413421
3,Perfect Blue,8.363466
4,Sennen Joyuu,8.352636


In [6]:
# ALS at rank 20, 1000 iterations, using dataset 1
X, Y = als.als(df_1, 20, 1000)
als_matrix_1 = X @ Y.T
recommend_anime(als_matrix_1, chosen_user_1, df_1)

Unnamed: 0,title,predicted_rating
0,Clannad: After Story,9.49542
1,Toradora!,9.288772
2,Azumanga Daioh,9.114554
3,Psycho-Pass,8.812352
4,Nichijou,8.744406


In [7]:
# ALS at rank 20, 1000 iterations, using dataset 2
X, Y = als.als(df_2, 20, 1000)
als_matrix_2 = X @ Y.T
recommend_anime(als_matrix_2, chosen_user_2, df_2)

Unnamed: 0,title,predicted_rating
0,Gekkan Shoujo Nozaki-kun,8.760389
1,Fate/stay night: Unlimited Blade Works 2nd Season,8.6651
2,Sakura Quest,8.527271
3,Mob Psycho 100 II,8.479505
4,Asobi Asobase,8.259562


In [8]:
# Spectral Regularization Model at lambda 5, using dataset 1
spectral_matrix_1 = spectral_regularization_model.spectral_regularization_model(5, df_1)
recommend_anime(spectral_matrix_1, chosen_user_1, df_1)

status: optimal


Unnamed: 0,title,predicted_rating
0,Monster,8.005141
1,Ouran Koukou Host Club,7.994429
2,Azumanga Daioh,7.977646
3,Psycho-Pass,7.96545
4,Higurashi no Naku Koro ni,7.94288


In [9]:
# Spectral Regularization Model at lambda 5, using dataset 2
spectral_matrix_2 = spectral_regularization_model.spectral_regularization_model(5, df_2)
recommend_anime(spectral_matrix_2, chosen_user_2, df_2)

status: optimal


Unnamed: 0,title,predicted_rating
0,Gekkan Shoujo Nozaki-kun,8.259591
1,Tenkuu no Shiro Laputa,8.178825
2,Perfect Blue,8.11358
3,Seto no Hanayome,8.047835
4,Asobi Asobase,8.030796


In [10]:
# nuclear norm model, using dataset 1
nuclear_matrix_1 = nuclear_norm_model.nuclear_norm_model_df(df_1)
recommend_anime(nuclear_matrix_1, chosen_user_1, df_1)

Optimization succeeded.


Unnamed: 0,title,predicted_rating
0,Clannad: After Story,9.208856
1,Ookami Kodomo no Ame to Yuki,9.038716
2,Azumanga Daioh,8.990107
3,Psycho-Pass,8.780612
4,Nichijou,8.421423


In [11]:
# nuclear norm model, using dataset 2
nuclear_matrix_2 = nuclear_norm_model.nuclear_norm_model_df(df_2)
recommend_anime(nuclear_matrix_2, chosen_user_2, df_2)

Optimization succeeded.


Unnamed: 0,title,predicted_rating
0,Gekkan Shoujo Nozaki-kun,8.969728
1,Yuri!!! on Ice,8.940827
2,Sakura Quest,8.543998
3,Mob Psycho 100 II,8.51721
4,Asobi Asobase,8.290307
