In [2]:
import pandas as pd
from tensorflow.keras import layers, models
import pickle
import numpy as np
import tensorflow as tf

In [4]:
def custom_loss(y_true, y_pred):
    weights = tf.where(tf.equal(y_true, 0), 0.1, 1.0)
    loss = tf.reduce_sum(tf.square(y_true - y_pred) * weights) / tf.reduce_sum(weights)
    
    return loss

In [6]:
model = tf.keras.models.load_model("7hours.keras", custom_objects={'custom_loss': custom_loss})




In [7]:
reconstruction_matrix = np.load("reconstruction.npy")
reconstruction_matrix

array([[ 0.399072  , -0.0188192 ,  0.27993286, ..., -0.02818522,
        -0.08856124, -0.09705086],
       [ 0.4181926 , -0.08891582,  0.1894778 , ..., -0.02764039,
        -0.07899314, -0.09730448],
       [ 0.36923045, -0.13931899,  0.22625524, ..., -0.02788803,
        -0.08536188, -0.09776939],
       ...,
       [ 0.42173898,  0.02625011,  0.37132847, ..., -0.02776409,
        -0.08975312, -0.09645234],
       [ 0.43414447, -0.07446012,  0.2843209 , ..., -0.02824889,
        -0.08614028, -0.09772252],
       [ 0.38871402, -0.03411348,  0.26016498, ..., -0.02830484,
        -0.08880506, -0.09723662]], dtype=float32)

In [8]:
user_item_matrix = pd.read_pickle("scaled_item-user.pkl").to_numpy()
user_item_matrix

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [9]:
mse = np.mean(np.square(user_item_matrix - reconstruction_matrix))
print(f"Mean Squared Error (MSE): {mse}")

Mean Squared Error (MSE): 0.04552352196961212


In [13]:
x = user_item_matrix[0].reshape(1,4697)

In [15]:
reconstructed_user = model.predict(x)
reconstructed_user



array([[ 0.39907244, -0.01881914,  0.27993238, ..., -0.02818505,
        -0.08856127, -0.09705094]], dtype=float32)

In [16]:
top_10_indices_values = [(index, value) for index, value in enumerate(reconstructed_user[0])]
top_10_indices_values.sort(key=lambda x: x[1], reverse=True)
top_10_indices_values = top_10_indices_values[:10]

print("Top 10 Indices and Values:")
for index, value in top_10_indices_values:
    print(f"Index: {index}, Value: {value}")

Top 10 Indices and Values:
Index: 11, Value: 0.8056979179382324
Index: 961, Value: 0.7226447463035583
Index: 10, Value: 0.7134723663330078
Index: 216, Value: 0.6840084195137024
Index: 2469, Value: 0.6478605270385742
Index: 161, Value: 0.6397867202758789
Index: 2374, Value: 0.5863435864448547
Index: 133, Value: 0.5836129188537598
Index: 1460, Value: 0.5807211399078369
Index: 1882, Value: 0.5669229030609131


In [17]:
df_ids = pd.read_csv("./data/10k.csv")

In [18]:
top_10_indices = [index for index, _ in top_10_indices_values]
corresponding_ids = df_ids.iloc[top_10_indices]

print("Corresponding IDs for Top 10 Indices:")
print(corresponding_ids)

Corresponding IDs for Top 10 Indices:
         Id
11       21
961    1735
10       20
216     269
2469  16498
161     199
2374  15051
133     165
1460   4898
1882   9253


In [39]:
user_ratings = pd.read_csv("../public/users/Pognark-kun.csv")
user_ratings.head(1)

Unnamed: 0,user_id,anime_id,rating
0,786,21,10


In [40]:
filtered_ratings = user_ratings[user_ratings['anime_id'].isin(df_ids['Id'])]

In [41]:
merged_df = pd.merge(df_ids, filtered_ratings, how='left', left_on='Id', right_on='anime_id')
final_array = merged_df['rating'].fillna(0).values.reshape(-1, 1)
final_array

array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]])

In [28]:
from sklearn.preprocessing import MinMaxScaler

In [42]:
for idx in range(final_array.shape[0]):
    value = final_array[idx, 0]
    if value != 0:
        min_value = final_array[:, 0].min()
        max_value = final_array[:, 0].max()
        final_array[idx, 0] = 0 if value == 0 else (value - min_value) / (max_value - min_value) * 2 - 1

final_array

array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]])

In [43]:
mowzok = final_array.reshape(1,4697)
mowzok

array([[0., 0., 0., ..., 0., 0., 0.]])

In [44]:
reconstructed_user = model.predict(mowzok)
reconstructed_user



array([[ 0.40278044, -0.0900328 ,  0.17116472, ..., -0.02794231,
        -0.08109729, -0.09755352]], dtype=float32)

In [47]:
top_10_indices_values = [(index, value) for index, value in enumerate(reconstructed_user[0])]
top_10_indices_values.sort(key=lambda x: x[1], reverse=True)
top_10_indices_values = top_10_indices_values

print("Top 10 Indices and Values:")
for index, value in top_10_indices_values:
    print(f"Index: {index}, Value: {value}")

Top 10 Indices and Values:
Index: 4256, Value: 0.9453617930412292
Index: 3036, Value: 0.891140341758728
Index: 2114, Value: 0.8673862218856812
Index: 3835, Value: 0.8609218001365662
Index: 11, Value: 0.8587948083877563
Index: 4185, Value: 0.8557373285293579
Index: 4130, Value: 0.8152465224266052
Index: 3040, Value: 0.8035708069801331
Index: 2666, Value: 0.8034327030181885
Index: 3098, Value: 0.8012992739677429
Index: 961, Value: 0.7989451289176941
Index: 1882, Value: 0.7970167398452759
Index: 2469, Value: 0.7837269902229309
Index: 4276, Value: 0.7625125050544739
Index: 10, Value: 0.7621586918830872
Index: 3347, Value: 0.7596656680107117
Index: 3001, Value: 0.7569962739944458
Index: 2490, Value: 0.7559574246406555
Index: 3166, Value: 0.7362174987792969
Index: 2997, Value: 0.7274031639099121
Index: 3838, Value: 0.7236072421073914
Index: 2813, Value: 0.7185488343238831
Index: 4088, Value: 0.715123176574707
Index: 3309, Value: 0.7149807214736938
Index: 4093, Value: 0.7088422775268555
Index

In [48]:
top_10_indices = [index for index, _ in top_10_indices_values]
corresponding_ids = df_ids.iloc[top_10_indices]

print("Corresponding IDs for Top 10 Indices:")
print(corresponding_ids)

Corresponding IDs for Top 10 Indices:
         Id
4256  38524
3036  28851
2114  11061
3835  35760
11       21
...     ...
1677   6795
286     379
3079  29949
1533   5507
117     147

[4697 rows x 1 columns]
