In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.initializers import HeNormal, GlorotNormal
import tensorflow as tf

In [2]:
# 무비렌즈 데이터 세트 호출
data_path = './ml-latest-small/'
df = pd.read_csv(data_path+'ratings.csv')

In [3]:
# binary implicit feedback
# rating >= 3.0일 경우 positive로 간주
df['rating'] = df['rating'].apply(lambda x: 1 if x >= 3.0 else 0)

# 전체 유저 수와 영화 수
n_users = df.userId.nunique()
n_movies = df.movieId.nunique()

# 데이터를 train/valid로 나눔
train, validation = train_test_split(df, test_size=0.2, random_state=42)

# 유저 아이디 indexing
unique_user_ids = df["userId"].unique()
userId_dict = {id: i for i, id in enumerate(unique_user_ids)}


# 영화 아이디 indexing
unique_movie_ids = df["movieId"].unique()
movieId_dict = {id: i for i, id in enumerate(unique_movie_ids)}

# 인덱싱 된 딕셔너리를 사용하여 ID 맵핑
train["userId"] = train["userId"].map(userId_dict)
train["movieId"] = train["movieId"].map(movieId_dict)

validation["userId"] = validation["userId"].map(userId_dict)
validation["movieId"] = validation["movieId"].map(movieId_dict)

# 전체 유니크 유저 수 
num_unique_users = df['userId'].nunique() + 1
# 전체 유니크 영화 수
num_unique_movies = df['movieId'].nunique() + 1

In [4]:
# 유저/아이템 입력 레이어
user_input = Input(shape=(1,))
movie_input = Input(shape=(1,))

# 임베딩 차원
n_latent_factors = 20

# 임베딩 레이어 정의
user_embedding = Embedding(num_unique_users, n_latent_factors, name='user_embedding')(user_input)
movie_embedding = Embedding(num_unique_movies, n_latent_factors, name='movie_embedding')(movie_input)

# 임베딩 flatten
user_vector = Flatten()(user_embedding)
movie_vector = Flatten()(movie_embedding)

# 유저 및 영화 임베딩 벡터 컨캣
concat = Concatenate()([user_vector, movie_vector])

Metal device set to: Apple M1 Pro

systemMemory: 32.00 GB
maxCacheSize: 10.67 GB



2025-04-09 20:35:43.488479: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-04-09 20:35:43.488651: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [17]:
# 결과 저장 테이블
results_df = pd.DataFrame(columns=['initializer', 'activation', 'last_loss', 'last_val_loss'])

# Initialization, activation 옵션
initializers = [HeNormal(), HeNormal(), GlorotNormal(), GlorotNormal()]
activations = ['relu', 'sigmoid', 'relu', 'sigmoid']

for init, act in zip(initializers, activations):
    
    print("\n")
    print("initializer: {}".format(init.__class__.__name__))
    print("activation: {}".format(act))
    print("\n")

    def create_mlp(hidden_layers, output):
        for i in range(len(hidden_layers)):
            if i == 0:
                hidden = Dense(hidden_layers[i], activation=act, kernel_initializer=init)(output)
            else:
                hidden = Dense(hidden_layers[i], activation=act, kernel_initializer=init)(hidden)
        output = Dense(1, activation='sigmoid', kernel_initializer=init)(hidden)
        return output

    output = create_mlp([64, 64], concat)

    model = Model(inputs=[user_input, movie_input], outputs=output)

    auc = tf.keras.metrics.AUC()
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', auc])

    history = model.fit([train.userId, train.movieId], train.rating, epochs=3, verbose=2, validation_data=([validation.userId, validation.movieId], validation.rating))

    # DataFrame에 결과 append 
    
    results_df = pd.concat([results_df,
                            pd.DataFrame.from_records([{'initializer': init.__class__.__name__,
                                        'activation': act,
                                        'last_loss': history.history['loss'][-1],
                                        'last_val_loss': history.history['val_loss'][-1]}])])



initializer: HeNormal
activation: relu


Epoch 1/3


2025-04-09 20:47:50.672644: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2025-04-09 20:48:16.536902: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


2521/2521 - 30s - loss: 0.2789 - accuracy: 0.8846 - auc_2: 0.9046 - val_loss: 0.4549 - val_accuracy: 0.8166 - val_auc_2: 0.7770 - 30s/epoch - 12ms/step
Epoch 2/3
2521/2521 - 28s - loss: 0.2428 - accuracy: 0.9006 - auc_2: 0.9288 - val_loss: 0.4822 - val_accuracy: 0.8144 - val_auc_2: 0.7758 - 28s/epoch - 11ms/step
Epoch 3/3
2521/2521 - 27s - loss: 0.2187 - accuracy: 0.9110 - auc_2: 0.9425 - val_loss: 0.5211 - val_accuracy: 0.8057 - val_auc_2: 0.7668 - 27s/epoch - 11ms/step


initializer: HeNormal
activation: sigmoid


Epoch 1/3


  results_df = pd.concat([results_df,
2025-04-09 20:49:16.020250: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2025-04-09 20:49:40.813902: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


2521/2521 - 29s - loss: 0.3387 - accuracy: 0.8565 - auc_3: 0.8552 - val_loss: 0.4063 - val_accuracy: 0.8306 - val_auc_3: 0.7934 - 29s/epoch - 12ms/step
Epoch 2/3
2521/2521 - 27s - loss: 0.3121 - accuracy: 0.8663 - auc_3: 0.8803 - val_loss: 0.4124 - val_accuracy: 0.8307 - val_auc_3: 0.7908 - 27s/epoch - 11ms/step
Epoch 3/3
2521/2521 - 30s - loss: 0.2934 - accuracy: 0.8741 - auc_3: 0.8947 - val_loss: 0.4260 - val_accuracy: 0.8295 - val_auc_3: 0.7873 - 30s/epoch - 12ms/step


initializer: GlorotNormal
activation: relu


Epoch 1/3


2025-04-09 20:50:43.144021: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2025-04-09 20:51:09.543626: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


2521/2521 - 32s - loss: 0.2617 - accuracy: 0.8910 - auc_4: 0.9174 - val_loss: 0.4967 - val_accuracy: 0.8159 - val_auc_4: 0.7757 - 32s/epoch - 13ms/step
Epoch 2/3
2521/2521 - 28s - loss: 0.2207 - accuracy: 0.9094 - auc_4: 0.9419 - val_loss: 0.5189 - val_accuracy: 0.8012 - val_auc_4: 0.7660 - 28s/epoch - 11ms/step
Epoch 3/3
2521/2521 - 27s - loss: 0.1971 - accuracy: 0.9203 - auc_4: 0.9539 - val_loss: 0.5592 - val_accuracy: 0.8009 - val_auc_4: 0.7557 - 27s/epoch - 11ms/step


initializer: GlorotNormal
activation: sigmoid


Epoch 1/3


2025-04-09 20:52:08.899979: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2025-04-09 20:52:33.897635: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


2521/2521 - 29s - loss: 0.3308 - accuracy: 0.8620 - auc_5: 0.8620 - val_loss: 0.4136 - val_accuracy: 0.8247 - val_auc_5: 0.7891 - 29s/epoch - 12ms/step
Epoch 2/3
2521/2521 - 27s - loss: 0.3043 - accuracy: 0.8699 - auc_5: 0.8871 - val_loss: 0.4237 - val_accuracy: 0.8294 - val_auc_5: 0.7915 - 27s/epoch - 11ms/step
Epoch 3/3
2521/2521 - 27s - loss: 0.2849 - accuracy: 0.8769 - auc_5: 0.9014 - val_loss: 0.4502 - val_accuracy: 0.8265 - val_auc_5: 0.7862 - 27s/epoch - 11ms/step


In [18]:
history

<keras.callbacks.History at 0x364f0a920>

In [20]:
# Print the DataFrame
display(results_df.sort_values(by='last_val_loss'))

Unnamed: 0,initializer,activation,last_loss,last_val_loss
0,HeNormal,sigmoid,0.29342,0.426024
0,GlorotNormal,sigmoid,0.284942,0.450174
0,HeNormal,relu,0.218692,0.521057
0,GlorotNormal,relu,0.197096,0.559213
