# Anime Recommendation Using MAL Database 2020 Dataset and Graph Machine Learning
Author: Zach Kangas

Date: 2/19/2023

Class: Graph Machine Learning

Professor: Dr. Urbain

# Importing Relevant Packages

In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from google.colab import drive

# Importing and Processing the Data

In [2]:
drive.mount('/content/drive')
base_path = "drive/My Drive/Colab Notebooks/"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
anime_dataset_path = base_path + "dataset/anime.csv"
anime_df = pd.read_csv(anime_dataset_path)
anime_df.head()

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",Spring 1998,...,229170.0,182126.0,131625.0,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,"Sep 1, 2001",Unknown,...,30043.0,49201.0,49505.0,22632.0,5805.0,1877.0,577.0,221.0,109.0,379.0
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",Trigun,トライガン,TV,26,"Apr 1, 1998 to Sep 30, 1998",Spring 1998,...,50229.0,75651.0,86142.0,49432.0,15376.0,5838.0,1965.0,664.0,316.0,533.0
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",Witch Hunter Robin,Witch Hunter ROBIN (ウイッチハンターロビン),TV,26,"Jul 2, 2002 to Dec 24, 2002",Summer 2002,...,2182.0,4806.0,10128.0,11618.0,5709.0,2920.0,1083.0,353.0,164.0,131.0
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",Beet the Vandel Buster,冒険王ビィト,TV,52,"Sep 30, 2004 to Sep 29, 2005",Fall 2004,...,312.0,529.0,1242.0,1713.0,1068.0,634.0,265.0,83.0,50.0,27.0


In [4]:
ratings_dataset_path = base_path + "dataset/rating_complete.csv"
ratings_df = pd.read_csv(ratings_dataset_path)
# Only grab the first 1/10th (about 150k entries)
ratings_df = ratings_df.head(int(ratings_df.shape[0]/10))
# Normalize 1-10 to 0-1
ratings_df.rating = (ratings_df.rating-1)/9
ratings_df.head()

Unnamed: 0,user_id,anime_id,rating
0,0,430,0.888889
1,0,1004,0.444444
2,0,3010,0.666667
3,0,570,0.666667
4,0,2762,0.888889


In [5]:
print(ratings_df.shape)

(5763327, 3)


In [6]:
anime_count = ratings_df.anime_id.unique().shape[0]
users_count = ratings_df.user_id.unique().shape[0]
print(anime_count, users_count)

15259 30954


In [7]:
anime_count = ratings_df.anime_id.max()+1
users_count = ratings_df.user_id.max()+1
print(anime_count, users_count)

48457 35327


# Modeling
## Creating the Model

In [8]:
users = keras.layers.Input(shape=(1,))
animes = keras.layers.Input(shape=(1,))

users_embedding = keras.layers.Embedding(users_count, 32)(users)
animes_embedding = keras.layers.Embedding(anime_count, 32)(animes)

users_flatten = keras.layers.Flatten()(users_embedding)
animes_flatten = keras.layers.Flatten()(animes_embedding)

x = keras.layers.Concatenate()([users_flatten, animes_flatten])
x = keras.layers.Dense(40, activation='relu')(x)
x = keras.layers.Dense(40, activation='relu')(x)
x = keras.layers.Dense(1)(x)

model = keras.models.Model(inputs=[users, animes], outputs=x)

In [9]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 embedding (Embedding)          (None, 1, 32)        1130464     ['input_1[0][0]']                
                                                                                                  
 embedding_1 (Embedding)        (None, 1, 32)        1550624     ['input_2[0][0]']                
                                                                                              

# Training the Model

In [10]:
model.compile(optimizer='adam',
               loss='mse',
               metrics=keras.metrics.MeanSquaredError())
model.fit(x=[ratings_df.user_id.values, ratings_df.anime_id.values], y=ratings_df.rating.values, epochs=2)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7fb35dc9ef10>