In [1]:
!nvidia-smi

Tue Jul 20 12:28:50 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.33.01    Driver Version: 440.33.01    CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 108...  On   | 00000000:03:00.0 Off |                  N/A |
| 23%   26C    P8     8W / 250W |      1MiB / 11178MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  GeForce GTX 108...  On   | 00000000:04:00.0 Off |                  N/A |
| 23%   26C    P8     8W / 250W |      1MiB / 11178MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   2  GeForce GTX 108...  On   | 00000000:07:00.0 Off |                  N/A |
| 23%   

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import cornac
from cornac.eval_methods import RatioSplit
from cornac.models import BPR, VBPR
from cornac.data import ImageModality

from collections import defaultdict

pd.set_option('display.max_colwidth',100)
%config Completer.use_jedi = False

## Image on VBPR

In [13]:
data = pd.read_csv('rating_without_0.csv')
ratings = list(data[['user_id', 'anime_id', 'rating']].drop_duplicates().itertuples(index=False))

with open('anime_image_features.npy', 'rb') as f:
    features = np.load(f)
    
with open('anime_image_ids.npy', 'rb') as f:
    ids = np.load(f)
    
item_image_modality = ImageModality(features=features, ids=ids, normalized=True)

ratio_split = RatioSplit(
    data=ratings,
     val_size=0.2,
    test_size=0.1,
    rating_threshold=4.0,
    exclude_unknowns=True,
    verbose=True,
    item_image=item_image_modality
)
eval_metrics = [cornac.metrics.NCRR(k=[15]),
                cornac.metrics.NDCG(k=[15]),
                cornac.metrics.Recall(k=15)]

rating_threshold = 4.0
exclude_unknowns = True
---
Training data:
Number of users = 71569
Number of items = 15915
Number of ratings = 10799677
Max rating = 10.0
Min rating = 1.0
Global mean = 7.4
---
Test data:
Number of users = 67253
Number of items = 14326
Number of ratings = 1542627
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 69097
Number of items = 15330
Number of ratings = 3085269
---
Total users = 71569
Total items = 15915


In [4]:
K = 70
vbpr = VBPR(k=K, k2=K, n_epochs=5, batch_size=1024, learning_rate=0.001, lambda_w=0.1, lambda_b=0.1, lambda_e=0.1, use_gpu=True)
bpr = BPR(k=(K * 2), max_iter=5, learning_rate=0.001, lambda_reg=0.1, verbose=True)

cornac.Experiment(eval_method=ratio_split, models=[vbpr, bpr], metrics=eval_metrics).run()

Epoch 1/5:   0%|          | 0/10547 [00:00<?, ?it/s]

Epoch 2/5:   0%|          | 0/10547 [00:00<?, ?it/s]

Epoch 3/5:   0%|          | 0/10547 [00:00<?, ?it/s]

Epoch 4/5:   0%|          | 0/10547 [00:00<?, ?it/s]

Epoch 5/5:   0%|          | 0/10547 [00:00<?, ?it/s]

Optimization finished!


  0%|          | 0/5 [00:00<?, ?it/s]

Optimization finished!

VALIDATION:
...
     | NCRR@15 | NDCG@15 | Recall@15 | Time (s)
---- + ------- + ------- + --------- + --------
VBPR |  0.1344 |  0.1360 |    0.0785 | 262.9246
BPR  |  0.1235 |  0.1213 |    0.0668 | 211.4490

TEST:
...
     | NCRR@15 | NDCG@15 | Recall@15 | Train (s) | Test (s)
---- + ------- + ------- + --------- + --------- + --------
VBPR |  0.0739 |  0.0797 |    0.0751 | 1915.3894 | 235.5127
BPR  |  0.0672 |  0.0706 |    0.0638 |    7.3039 | 209.0069



## Run on full dataset

In [3]:
data = pd.read_csv('rating_without_0.csv')
ratings = cornac.data.Dataset.from_uir(data[['user_id', 'anime_id', 'rating']].drop_duplicates().itertuples(index=False))

with open('anime_image_features.npy', 'rb') as f:
    features = np.load(f)
    
with open('anime_image_ids.npy', 'rb') as f:
    ids = np.load(f)
    
item_image_modality = ImageModality(features=features, ids=ids, normalized=True)
ratings.add_modalities(item_image=item_image_modality)

In [4]:
K = 70
model = VBPR(k=K, k2=K, n_epochs=5, batch_size=1024, learning_rate=0.001, lambda_w=0.1, lambda_b=0.1, lambda_e=0.1, use_gpu=True)

model.fit(ratings)

Epoch 1/5:   0%|          | 0/15067 [00:00<?, ?it/s]

Epoch 2/5:   0%|          | 0/15067 [00:00<?, ?it/s]

Epoch 3/5:   0%|          | 0/15067 [00:00<?, ?it/s]

Epoch 4/5:   0%|          | 0/15067 [00:00<?, ?it/s]

Epoch 5/5:   0%|          | 0/15067 [00:00<?, ?it/s]

Optimization finished!


<cornac.models.vbpr.recom_vbpr.VBPR at 0x7f72fc3399b0>

In [5]:
anime = pd.read_csv('anime_info.csv')
anime['airtime'] = 2021 - anime.Aired.apply(lambda x:int(x.split(' to ')[0][-4:]) if x != 'Unknown' else 2021)

Unnamed: 0,MAL_ID,Name,Score,Genders,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1,sypnopsis,pic_url,anime_url
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",Spring 1998,...,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0,"In the year 2071, humanity has colonized several of the planets and moons of the solar system le...",https://cdn.myanimelist.net/images/anime/4/19644.jpg,https://myanimelist.net/anime/1


In [36]:
userid = 6039
model = vbpr
UIDX = model.train_set.uid_map[userid]

watched = data[data['user_id'] == userid]['anime_id'].unique()

item_id2idx = model.train_set.iid_map
item_idx2id = list(model.train_set.item_ids)


pd.set_option('display.max_colwidth',1000)
TOPK = 5

recommendations, scores = model.rank(UIDX)

print(f"\nTOP {TOPK} RECOMMENDATIONS:")
rec_df = defaultdict(list)
n = 0
total_rec = 0
while total_rec < 5:
    i = recommendations[n]
    if item_idx2id[i] not in watched and anime[anime['MAL_ID'] == item_idx2id[i]]['airtime'].item() < 3:
        cf_contrib = model.beta_item[i] + np.dot(model.gamma_item[i], model.gamma_user[UIDX])
        vis_contrib = model.visual_bias[i] + np.dot(model.theta_item[i], vbpr.theta_user[UIDX])
        rec_df["MAL_ID"].append(item_idx2id[i])
        rec_df["Score"].append(scores[i])
        rec_df["CF Contribution"].append(cf_contrib)
        rec_df["Visual Contribution"].append(vis_contrib)
        rec_df["anime_name"].append(anime[anime['MAL_ID'] == item_idx2id[i]]['English name'].item())
        rec_df["pic_url"].append(anime[anime['MAL_ID'] == item_idx2id[i]]['pic_url'].item())
        total_rec += 1
    n += 1
    
pd.DataFrame(rec_df)


TOP 5 RECOMMENDATIONS:


Unnamed: 0,MAL_ID,Score,CF Contribution,Visual Contribution,anime_name,pic_url
0,38000,0.893973,3.298187,-2.404215,Demon Slayer:Kimetsu no Yaiba,https://cdn.myanimelist.net/images/anime/1286/99889.jpg
1,38524,0.492789,2.684249,-2.19146,Attack on Titan Season 3 Part 2,https://cdn.myanimelist.net/images/anime/1517/100633.jpg
2,40748,-0.231155,2.141249,-2.372404,Unknown,https://cdn.myanimelist.net/images/anime/1171/109222.jpg
3,37520,-0.251377,1.295066,-1.546443,Dororo,https://cdn.myanimelist.net/images/anime/1879/100467.jpg
4,38691,-0.321643,2.265505,-2.587148,Dr. Stone,https://cdn.myanimelist.net/images/anime/1613/102576.jpg


## Text on VBPR

In [38]:
with open('anime_text_features.npy', 'rb') as f:
    features = np.load(f)
    
with open('anime_text_ids.npy', 'rb') as f:
    ids = np.load(f)

ids_set = set(ids) # reduce time complexity
data = pd.read_csv('rating_without_0.csv')[['user_id', 'anime_id', 'rating']].drop_duplicates()
data['exclude'] = data['anime_id'].apply(lambda x:1 if x in ids_set else 0)
ratings = list(data[data['exclude'] == 1][['user_id', 'anime_id', 'rating']].itertuples(index=False))
print('ratings processing done')

item_text_modality = ImageModality(features=features, ids=ids, normalized=True)

ratio_split = RatioSplit(
    data=ratings,
    test_size=0.2,
    rating_threshold=4.0,
    exclude_unknowns=True,
    item_image=item_text_modality
)
print('data spliting done')

eval_metrics = [cornac.metrics.NCRR(k=[15]),
                cornac.metrics.NDCG(k=[15]),
                cornac.metrics.Recall(k=15)]

ratings processing done
data spliting done


In [39]:
K = 70
vbpr = VBPR(k=K, k2=K, n_epochs=5, batch_size=1024, learning_rate=0.001, lambda_w=0.1, lambda_b=0.1, lambda_e=0.1, use_gpu=True)
bpr = BPR(k=(K * 2), max_iter=5, learning_rate=0.001, lambda_reg=0.1, verbose=True)

cornac.Experiment(eval_method=ratio_split, models=[vbpr, bpr], metrics=eval_metrics).run()

Epoch 1/5:   0%|          | 0/11988 [00:00<?, ?it/s]

Epoch 2/5:   0%|          | 0/11988 [00:00<?, ?it/s]

Epoch 3/5:   0%|          | 0/11988 [00:00<?, ?it/s]

Epoch 4/5:   0%|          | 0/11988 [00:00<?, ?it/s]

Epoch 5/5:   0%|          | 0/11988 [00:00<?, ?it/s]

Optimization finished!


  0%|          | 0/5 [00:00<?, ?it/s]

Optimization finished!

TEST:
...
     | NCRR@15 | NDCG@15 | Recall@15 | Train (s) | Test (s)
---- + ------- + ------- + --------- + --------- + --------
VBPR |  0.1344 |  0.1359 |    0.0803 | 2104.5959 | 293.4130
BPR  |  0.1233 |  0.1217 |    0.0683 |    8.3686 | 202.3798



In [40]:
# five more epoch
cornac.Experiment(eval_method=ratio_split, models=[vbpr, bpr], metrics=eval_metrics).run()

Epoch 1/5:   0%|          | 0/11988 [00:00<?, ?it/s]

Epoch 2/5:   0%|          | 0/11988 [00:00<?, ?it/s]

Epoch 3/5:   0%|          | 0/11988 [00:00<?, ?it/s]

Epoch 4/5:   0%|          | 0/11988 [00:00<?, ?it/s]

Epoch 5/5:   0%|          | 0/11988 [00:00<?, ?it/s]

Optimization finished!


  0%|          | 0/5 [00:00<?, ?it/s]

Optimization finished!

TEST:
...
     | NCRR@15 | NDCG@15 | Recall@15 | Train (s) |  Test (s)
---- + ------- + ------- + --------- + --------- + ---------
VBPR |  0.1323 |  0.1355 |    0.0832 | 2146.2517 | 1055.3039
BPR  |  0.1236 |  0.1221 |    0.0681 |    8.4568 |  542.4332



In [44]:
vbpr.save('VBPR/')

VBPR model is saved to VBPR/VBPR/2021-07-18_11-06-16-924108.pkl


'VBPR/VBPR/2021-07-18_11-06-16-924108.pkl'