In [1]:
import pandas as pd 
import numpy as np

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 50)

In [6]:
user = pd.read_csv("data/user.csv")

In [7]:
user.nunique()

User ID        35141
Username       35140
Anime ID       14037
Anime Title    14067
Score             10
dtype: int64

In [8]:
user.shape

(3752106, 5)

In [10]:
user.drop_duplicates(inplace=True)

In [376]:
filtered_user = user[user['Anime ID'].isin(data.mal_id)]

In [378]:
counts = filtered_user['User ID'].value_counts()
filtered_user = filtered_user[filtered_user["User ID"].isin(counts[counts>50].index)]

In [379]:
filtered_user.nunique()

User ID        16838
Username       16837
Anime ID        8282
Anime Title     8312
Score             10
dtype: int64

In [380]:
filtered_user = filtered_user.reset_index(drop=True)

In [381]:
filtered_user = filtered_user.iloc[:, [0,2,3,4]]

In [382]:
filtered_user.rename(columns={'User ID':'user_id','Anime ID':'anime_id'},inplace=True)

In [383]:
user_ids = pd.Categorical(filtered_user["user_id"])
filtered_user["user_id_encoded"] = user_ids.codes

anime_ids = pd.Categorical(filtered_user["anime_id"])
filtered_user["anime_id_encoded"] = anime_ids.codes

In [385]:
from sklearn.preprocessing import MinMaxScaler

In [386]:
minmax = MinMaxScaler()
filtered_user["Score_scaled"] = minmax.fit_transform(filtered_user[["Score"]])

In [387]:
filtered_user.shape

(3316985, 7)

In [388]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    filtered_user[["user_id_encoded", "anime_id_encoded"]], filtered_user["Score_scaled"], test_size=0.2, random_state=40 , shuffle=True
)


In [389]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import regularizers

In [452]:
num_users = len(set(X_train["user_id_encoded"]))  # Assuming unique user IDs
num_animes = len(set(X_train["anime_id_encoded"]))  # Assuming unique anime IDs
embedding_dim = 32  # Adjust dimensionality as needed

In [453]:
y_train

665344     0.666667
2122242    0.555556
3245820    0.666667
3185269    0.444444
578360     0.555556
             ...   
841010     0.666667
1928184    0.888889
2914311    0.555556
1947867    0.555556
1977670    0.444444
Name: Score_scaled, Length: 2653588, dtype: float64

In [454]:
user_input = keras.layers.Input(name='user_encoded',shape=(1,))
anime_input = keras.layers.Input(name='anime_encoded',shape=(1,))

In [455]:
user_embeddings = keras.layers.Embedding(num_users, embedding_dim, name='user_embedding')(user_input)
anime_embeddings = keras.layers.Embedding(num_animes, embedding_dim,name='anime_embedding')(anime_input)

In [456]:
dot_product = keras.layers.Dot(name='dot_product',axes=2)([user_embeddings, anime_embeddings])
flattened = keras.layers.Flatten()(dot_product)

In [457]:
dense = keras.layers.Dense(64, activation='relu')(flattened)

In [458]:
output = keras.layers.Dense(1, activation="sigmoid")(dense)  # Optional bias can be added before this layer

In [459]:
model = keras.Model(
    inputs=[user_input, anime_input], outputs=output
)

In [460]:
model.compile(
    optimizer="adam", loss="mse", metrics=["mse", "mae"]  # Add more metrics as needed
)

In [461]:
model.summary()

Model: "model_9"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 user_encoded (InputLayer)   [(None, 1)]                  0         []                            
                                                                                                  
 anime_encoded (InputLayer)  [(None, 1)]                  0         []                            
                                                                                                  
 user_embedding (Embedding)  (None, 1, 32)                538816    ['user_encoded[0][0]']        
                                                                                                  
 anime_embedding (Embedding  (None, 1, 32)                265024    ['anime_encoded[0][0]']       
 )                                                                                          

In [462]:
model.fit(
    [X_train['user_id_encoded'], X_train['anime_id_encoded']],  # Separate user and anime IDs
    y_train,
    epochs=5,  # Adjust as needed
    batch_size=64,  # Adjust as needed
    validation_data=([X_val['user_id_encoded'], X_val['anime_id_encoded']], y_val),
)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x1afe93c23d0>

In [463]:
# Assume you have test data
X_test_user = X_val['user_id_encoded']
X_test_item = X_val['anime_id_encoded']

# Make predictions
predictions = model.predict([X_test_user, X_test_item])




In [465]:
model.save('model.h5') 

In [468]:
pickle.dump(filtered_user,open('user.pkl','wb'))

In [470]:
anime_id = data[data.title == "Naruto"].index[0]

In [467]:
anime_size = anime_ids.shape[0]
anime_size

8282

In [466]:
anime_ids = np.array(list(set(filtered_user.anime_id_encoded)))

In [473]:
# Assume 'user_id' is 203
user_id = 909

# Create an array of the user ID repeated for each anime
user_ids = np.array([user_id]*anime_size)

# Make predictions for all animes for this user
predictions = model.predict([user_ids, anime_ids])
top_anime_index = predictions.flatten().argsort()[-20:][::-1]




In [476]:
a = filtered_user[filtered_user.anime_id_encoded.isin(top_anime_index)][['anime_id']]
rec_anime = a.anime_id.unique()
data[data.mal_id.isin(rec_anime)]['title']

25      Rurouni Kenshin: Meiji Kenkaku Romantan - Tsui...
406                                              Mushishi
1414                                Byousoku 5 Centimeter
1821    Galaxy Angel Music Collection: Shouen to Shien...
2073                Kara no Kyoukai Movie 1: Fukan Fuukei
2652     Kara no Kyoukai Movie 2: Satsujin Kousatsu (Zen)
2653            Kara no Kyoukai Movie 3: Tsuukaku Zanryuu
2751            Clannad: Mou Hitotsu no Sekai, Tomoyo-hen
2825                Kara no Kyoukai Movie 4: Garan no Dou
2826                 Kara no Kyoukai Movie 5: Mujun Rasen
3046      Kara no Kyoukai Movie 7: Satsujin Kousatsu (Go)
3441                           Gintama: Shiroyasha Koutan
5242                   Mushishi Zoku Shou: Odoro no Michi
5260    Aoki Hagane no Arpeggio: Ars Nova Movie 2 - Ca...
5333                         Koukaku Kidoutai: Shin Movie
5457                  Mushishi Zoku Shou: Suzu no Shizuku
5458                                             Gintama°
5833          

In [475]:
from sklearn.metrics.pairwise import cosine_similarity

# Assume 'anime_id' is 21
anime_id = 10

# Get the embedding of the anime
anime_embedding = model.get_layer('anime_embedding').get_weights()[0]
target_anime_embedding = anime_embedding[anime_id]

# Calculate the cosine similarity between the target anime and all animes
similarities = cosine_similarity([target_anime_embedding], anime_embedding)

# Get the indices of the top 10 similar animes
top_10_indices = similarities[0].argsort()[-10:][::-1]

# Get the IDs of the top 10 similar animes
top_10_anime_ids = anime_ids[top_10_indices]

# Now 'top_10_anime_ids' contains the IDs of the top 10 animes similar to the target anime
a = filtered_user[filtered_user.anime_id_encoded.isin(top_10_anime_ids)][['anime_id']]
rec_anime = a.anime_id.unique()
data[data.mal_id.isin(rec_anime)]['title']

10                                                 Naruto
231                                                Bleach
393     Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shin...
796     Naruto Movie 2: Dai Gekitotsu! Maboroshi no Ch...
1451                                   Naruto: Shippuuden
1780    Naruto Movie 3: Dai Koufun! Mikazuki Jima no A...
1983                           Naruto: Shippuuden Movie 1
2855                  Naruto: Shippuuden Movie 2 - Kizuna
4856    One Piece: Episode of Merry - Mou Hitori no Na...
6060        Koutetsujou no Kabaneri Movie 2: Moeru Inochi
Name: title, dtype: object