In [1]:
import pandas as pd 
import numpy as np, random as python_random
import ast
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle

# Model
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Concatenate, Dense, Input, Embedding, Flatten, Dropout, Multiply
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [2]:
def reset_random_seeds():
    tf.random.set_seed(46)
    np.random.seed(46)
    python_random.seed(46)
reset_random_seeds()

In [3]:
df = pd.read_csv("./lyrics_embeds/emb_all_.csv", index_col= 0 )

In [4]:
df.head()

Unnamed: 0,track_uri,track_name,artist,user_id,duration_ms,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,lyrics,like,bert_emb
0,0qCQg5TkfBfkTsQP3IhAmC,Southside Of Heaven,Ryan Bingham,95797a27c7ec61a04f0c9437448eee7233e8a456,379160,0.387,0.499,7,-10.207,1,0.0259,0.0029,0.00373,0.0695,0.333,111.032,"When I die, Lord, oh, won't you put my soul up...",0,"[-0.3453507125377655, 0.40673190355300903, -0...."
1,5B3UjDMiCwWFgnXqt0isd5,Martin Sheen Or JFK,Yellowcard,2e91726a1f802c680a8499544edc3561ce50b965,226746,0.256,0.756,2,-5.395,1,0.039,0.000128,0.0,0.116,0.268,197.512,Here it is\n One more glass for these broken h...,0,"[-0.3730098307132721, 0.11017447710037231, 0.0..."
2,23qnota5Iyg4f9LNdNIC1D,Televators,The Mars Volta,aafd6caf4b6dad692dd93623cc5c4e5b5a919600,378786,0.306,0.541,11,-8.195,0,0.0383,0.457,6e-06,0.122,0.183,122.041,Just as he hit\n The ground\n They lowered a t...,0,"[-0.684442400932312, 0.43042245507240295, -0.5..."
3,2sy0icOIskeP2lCqgZiTyE,Talk Show Host,Radiohead,28b78e314a00f86e24ae7ab1e40392861fbdc5a1,281000,0.535,0.479,5,-14.112,0,0.0311,0.277,0.0285,0.139,0.504,88.841,I want to\n I want to be someone else or I'll ...,0,"[-0.24317079782485962, -0.031122945249080658, ..."
4,19olraJp56OeGTQSZwLZc1,Dying Inside,Saint Vitus,8c5d4fe93b5eebbbbe5b80fdac548920c08c395c,445126,0.313,0.406,1,-14.754,1,0.0347,0.0278,0.000437,0.125,0.218,112.856,I have got to change my ways\n 'Cause I'm losi...,0,"[-0.2310483604669571, 0.29943153262138367, -0...."


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 104098 entries, 0 to 104097
Data columns (total 19 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   track_uri         104098 non-null  object 
 1   track_name        104098 non-null  object 
 2   artist            104098 non-null  object 
 3   user_id           104098 non-null  object 
 4   duration_ms       104098 non-null  int64  
 5   danceability      104098 non-null  float64
 6   energy            104098 non-null  float64
 7   key               104098 non-null  int64  
 8   loudness          104098 non-null  float64
 9   mode              104098 non-null  int64  
 10  speechiness       104098 non-null  float64
 11  acousticness      104098 non-null  float64
 12  instrumentalness  104098 non-null  float64
 13  liveness          104098 non-null  float64
 14  valence           104098 non-null  float64
 15  tempo             104098 non-null  float64
 16  lyrics            104098 

In [6]:
df = df.rename(columns= {'track_uri':'track_id'})

In [7]:
# csv 형식이라.. str -> 리스트로 바꾸는 작업
# 새로 불러올때마다 실행 
df['bert_emb'] = df['bert_emb'].apply(ast.literal_eval) 

In [8]:
len(df['bert_emb'].iloc[0])

768

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 104098 entries, 0 to 104097
Data columns (total 19 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   track_id          104098 non-null  object 
 1   track_name        104098 non-null  object 
 2   artist            104098 non-null  object 
 3   user_id           104098 non-null  object 
 4   duration_ms       104098 non-null  int64  
 5   danceability      104098 non-null  float64
 6   energy            104098 non-null  float64
 7   key               104098 non-null  int64  
 8   loudness          104098 non-null  float64
 9   mode              104098 non-null  int64  
 10  speechiness       104098 non-null  float64
 11  acousticness      104098 non-null  float64
 12  instrumentalness  104098 non-null  float64
 13  liveness          104098 non-null  float64
 14  valence           104098 non-null  float64
 15  tempo             104098 non-null  float64
 16  lyrics            104098 

In [10]:
df_copy = df.copy()

In [11]:
def Load_data(df_copy, user, song, duration, danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, like, lyrics, bert): # extract items
    df_copy = df_copy[[user, song, duration, danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo,  like, lyrics, bert]] 
    df_copy.rename(columns = {user: "user",
                        song: "song",
                        duration: "duration",
                        tempo : "tempo",
                        bert : "Bert"},
             inplace = True)


    le = LabelEncoder()
    df_copy["user"] = le.fit_transform(df_copy["user"].values)
    df_copy["song"] = le.fit_transform(df_copy["song"].values)

    USER_LEN = df_copy["user"].max() + 1 # number of users
    ITEM_LEN = df_copy["song"].max() + 1 # number of items
    return df_copy, USER_LEN, ITEM_LEN

In [12]:
df_copy, USER_LEN, ITEM_LEN = Load_data(df_copy, "user_id", "track_id", "duration_ms", "danceability", "energy", "key",
                                         "loudness", "mode", "speechiness", "acousticness", "instrumentalness", "liveness",
                                           "valence", "tempo", "like", "lyrics", "bert_emb")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_copy.rename(columns = {user: "user",
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_copy["user"] = le.fit_transform(df_copy["user"].values)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_copy["song"] = le.fit_transform(df_copy["song"].values)


In [13]:
df_copy.head()

Unnamed: 0,user,song,duration,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,like,lyrics,Bert
0,50831,949,379160,0.387,0.499,7,-10.207,1,0.0259,0.0029,0.00373,0.0695,0.333,111.032,0,"When I die, Lord, oh, won't you put my soul up...","[-0.3453507125377655, 0.40673190355300903, -0...."
1,15875,2663,226746,0.256,0.756,2,-5.395,1,0.039,0.000128,0.0,0.116,0.268,197.512,0,Here it is\n One more glass for these broken h...,"[-0.3730098307132721, 0.11017447710037231, 0.0..."
2,58212,1734,378786,0.306,0.541,11,-8.195,0,0.0383,0.457,6e-06,0.122,0.183,122.041,0,Just as he hit\n The ground\n They lowered a t...,"[-0.684442400932312, 0.43042245507240295, -0.5..."
3,13808,2064,281000,0.535,0.479,5,-14.112,0,0.0311,0.277,0.0285,0.139,0.504,88.841,0,I want to\n I want to be someone else or I'll ...,"[-0.24317079782485962, -0.031122945249080658, ..."
4,47751,1192,445126,0.313,0.406,1,-14.754,1,0.0347,0.0278,0.000437,0.125,0.218,112.856,0,I have got to change my ways\n 'Cause I'm losi...,"[-0.2310483604669571, 0.29943153262138367, -0...."


In [14]:
def preprocessing_data(df): # 8/2로 데이터 나눔
    TRAIN_SIZE = 0.8
    likes = shuffle(df, random_state=1)
    cutoff = int(TRAIN_SIZE * len(likes))
    train_df = likes.iloc[:cutoff]
    test_df = likes.iloc[cutoff:]

    return train_df, test_df

In [15]:
train_df, test_df = preprocessing_data(df_copy)

In [16]:
# 스케일링

audio_features = [
    "duration","danceability", "energy", "loudness", "speechiness",
    "acousticness", "instrumentalness", "liveness", "valence", "tempo"
]


scaler = StandardScaler()
scaler.fit(train_df[audio_features])

train_audio_scaled = scaler.transform(train_df[audio_features])
test_audio_scaled = scaler.transform(test_df[audio_features])

In [17]:
train_user = train_df["user"].values
train_item = train_df["song"].values
train_duration = train_df["duration"].values
train_key = train_df["key"].values
train_mode = train_df["mode"].values
train_bert = np.array(train_df["Bert"].tolist())
train_y = train_df["like"].values

train_duration = train_audio_scaled[:, audio_features.index("duration")] 
train_danceability = train_audio_scaled[:, audio_features.index("danceability")]
train_energy  = train_audio_scaled[:, audio_features.index("energy")]
train_loudness = train_audio_scaled[:, audio_features.index("loudness")]
train_speechiness = train_audio_scaled[:, audio_features.index("speechiness")]
train_acousticness = train_audio_scaled[:, audio_features.index("acousticness")]
train_instrumentalness = train_audio_scaled[:, audio_features.index("instrumentalness")]
train_liveness = train_audio_scaled[:, audio_features.index("liveness")]
train_valence = train_audio_scaled[:, audio_features.index("valence")]
train_tempo = train_audio_scaled[:, audio_features.index("tempo")]


In [18]:

test_user = test_df["user"].values
test_item = test_df["song"].values
test_key = test_df["key"].values           
test_mode = test_df["mode"].values          
test_bert = np.array(test_df["Bert"].tolist())
test_y = test_df["like"].values

test_duration = test_audio_scaled[:, audio_features.index("duration")] 
test_danceability = test_audio_scaled[:, audio_features.index("danceability")]
test_energy = test_audio_scaled[:, audio_features.index("energy")]
test_loudness = test_audio_scaled[:, audio_features.index("loudness")]
test_speechiness = test_audio_scaled[:, audio_features.index("speechiness")]
test_acousticness = test_audio_scaled[:, audio_features.index("acousticness")]
test_instrumentalness = test_audio_scaled[:, audio_features.index("instrumentalness")]
test_liveness = test_audio_scaled[:, audio_features.index("liveness")]
test_valence = test_audio_scaled[:, audio_features.index("valence")]
test_tempo = test_audio_scaled[:, audio_features.index("tempo")]


In [19]:
print(f"train_user shape: {train_user.shape}")
print(f"train_y shape: {train_y.shape}")

print(f"test_user shape: {test_user.shape}")
print(f"test_user_bert shape: {test_bert.shape}")
print(f"test_tempo shape: {test_tempo.shape}")
print(f"test_valence shape: {test_valence.shape}")

train_user shape: (83278,)
train_y shape: (83278,)
test_user shape: (20820,)
test_user_bert shape: (20820, 768)
test_tempo shape: (20820,)
test_valence shape: (20820,)


In [20]:
'''Parameter setting'''
learning_rate = 0.0001 # [0.0001, 0.0002, 0.001, 0.002] 
batch_size = 128 # [32, 64, 128]  
dropout = 0.4 # [ 0.2, 0.3, 0.4 ] 

In [21]:
'''전체 모델 (BERT포함)'''

def ModelBuild_Full(user_num, item_num, id_dims):

    # user
    user_input = Input(shape=(1,), dtype='int32', name='UserInput')
    user_embedding = Embedding(user_num, id_dims, input_length=user_input.shape[1], name='UserIDEmb')(user_input)
    user_embedding = Flatten(name='UserFlatten')(user_embedding)

    # item
    item_input = Input(shape=(1,), dtype='int32', name='ItemInput')
    item_embedding = Embedding(item_num, id_dims, input_length=user_input.shape[1], name='ItemIDEmb')(item_input)
    item_embedding = Flatten(name='itemFlatten')(item_embedding)

    # GMF Layer
    GMF = Multiply()([user_embedding, item_embedding])

    # Bert
    bert_input = Input(shape=(768,), name='Bert') 
    bert_1 = Dense(512, activation='relu')(bert_input)
    bert_1_dropout = Dropout(dropout)(bert_1)
    bert_2 = Dense(128, activation='relu')(bert_1_dropout)
    bert_2_dropout = Dropout(dropout)(bert_2)
    bert_3 = Dense(64, activation='relu')(bert_2_dropout)
    bert_3_dropout = Dropout(dropout)(bert_3)

    
    #### audio features
    duration_input = Input(shape=(1,), dtype = "float32", name="duration")
    danceability_input = Input(shape=(1,), dtype = "float32", name="danceability")
    energy_input = Input(shape=(1,), dtype = "float32", name="energy")
    key_input = Input(shape=(1,), dtype = "float32", name="key")
    loudness_input = Input(shape=(1,), dtype = "float32", name="loudness")
    mode_input = Input(shape=(1,), dtype = "float32", name="mode")
    speechiness_input = Input(shape=(1,), dtype = "float32", name="speechiness")
    acousticness_input = Input(shape=(1,), dtype = "float32", name="acousticness")
    instrumentalness_input = Input(shape=(1,), dtype = "float32", name="instrumentalness")
    liveness_input = Input(shape=(1,), dtype = "float32", name="liveness")
    valence_input = Input(shape=(1,), dtype = "float32", name="valence")
    tempo_input = Input(shape=(1,), dtype = "float32", name="tempo")

    audio_inputs = [
        duration_input, danceability_input, energy_input, key_input, loudness_input,
        mode_input, speechiness_input, acousticness_input, instrumentalness_input,
        liveness_input, valence_input, tempo_input
    ]


     # MLP Layer
    MLP_input = Concatenate(name="MLP_input")([user_embedding, item_embedding] + audio_inputs + [bert_3_dropout])

    MLP_1 = Dense(64, activation = "relu")(MLP_input)
    MLP_1_dropout = Dropout(dropout)(MLP_1)
    MLP_2 = Dense(32, activation = "relu")(MLP_1_dropout)
    MLP_2_dropout = Dropout(dropout)(MLP_2)

    # NeuMF Layer

    NeuMF_input = Concatenate(name = "NeuMF_input")([GMF, MLP_2_dropout])

    NeuMF_1 = Dense(64, activation = "relu")(NeuMF_input)
    NeuMF_1_dropout = Dropout(dropout)(NeuMF_1)
    NeuMF_2 = Dense(32, activation = "relu")(NeuMF_1_dropout)
    NeuMF_2_dropout = Dropout(dropout)(NeuMF_2)
    outputs = Dense(1, activation='sigmoid', name='outputs')(NeuMF_2_dropout)

    model = Model(inputs=[user_input, item_input, duration_input, danceability_input, energy_input,
                            key_input, loudness_input, mode_input, speechiness_input, acousticness_input,
                            instrumentalness_input, liveness_input, valence_input, tempo_input, bert_input], outputs=outputs)
    

    return model

In [22]:
#model = ModelBuild_Full(user_num = USER_LEN, item_num = ITEM_LEN, id_dims = 128)
model = ModelBuild_Full(user_num = USER_LEN, item_num = ITEM_LEN, id_dims = 32)



In [23]:
adam = Adam(learning_rate=learning_rate)

model.compile(optimizer=adam, loss='binary_crossentropy')
es = EarlyStopping(monitor='val_loss', mode = 'min', verbose = 1, patience = 3, restore_best_weights = True)

In [24]:
'''모델 학습'''

model.fit([train_user, train_item, train_duration, train_danceability, train_energy,
            train_key, train_loudness, train_mode, train_speechiness, train_acousticness,
            train_instrumentalness, train_liveness,train_valence, train_tempo, train_bert],
              train_y,
              batch_size = batch_size,
              epochs = 50,
              callbacks=[es],
              validation_split = 0.1)

Epoch 1/50
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 20ms/step - loss: 0.7267 - val_loss: 0.6929
Epoch 2/50
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 20ms/step - loss: 0.6990 - val_loss: 0.6930
Epoch 3/50
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - loss: 0.6959 - val_loss: 0.6930
Epoch 4/50
[1m586/586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 18ms/step - loss: 0.6949 - val_loss: 0.6930
Epoch 4: early stopping
Restoring model weights from the end of the best epoch: 1.


<keras.src.callbacks.history.History at 0x24bfe832fb0>

In [25]:
prediction = model.predict([test_user, test_item, test_duration, test_danceability, test_energy,
                                                 test_key, test_loudness, test_mode, test_speechiness, test_acousticness,
                                                  test_instrumentalness, test_liveness, test_valence, test_tempo, test_bert])

[1m651/651[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step


In [26]:
#with bert

binary_pred = (prediction > 0.5).astype(int)

accuracy = accuracy_score(test_y, binary_pred)
f1 = f1_score(test_y, binary_pred)
precision = precision_score(test_y, binary_pred)
recall = recall_score(test_y, binary_pred)

print("with BERT")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")

with BERT
Accuracy: 0.5012
F1 Score: 0.0367
Precision: 0.4853
Recall: 0.0191
