In [1]:
import pandas as pd
import numpy as np

import tensorflow as tf

from keras._tf_keras.keras.models import Model, load_model
from keras._tf_keras.keras.layers import Input, Embedding, Dense, Flatten, Concatenate, GlobalAveragePooling1D, Normalization, Multiply
from keras._tf_keras.keras.preprocessing.sequence import pad_sequences

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("Num CPUs Available: ", len(tf.config.list_physical_devices('CPU')))
print("Num TPUs Available: ", len(tf.config.list_physical_devices('TPU')))

2.16.1
[]
Num GPUs Available:  0
Num CPUs Available:  1
Num TPUs Available:  0


In [2]:
# Data Preprocessing
def preprocess_data(file_path):
    print("Preprocessing data... 🛠️", 'orange')

    whole_data = pd.read_csv(file_path)
    whole_data.drop(["website", "place_links", "description", "territory_id"], axis=1, inplace=True)


    def safe_int_convert(tag_list):
        if isinstance(tag_list, list):
            return tag_list
        elif isinstance(tag_list, (int, float)):
            return [int(tag_list)]
        elif isinstance(tag_list, str):
            try:
                return list(map(int, tag_list.split(',')))
            except ValueError:
                return []
        return []


    whole_data['tags'] = whole_data['tags'].apply(safe_int_convert)
    whole_data['rating'] = whole_data['rating'].astype(float).fillna(whole_data['rating'].mean())
    tags_padded = pad_sequences(whole_data['tags'], padding='post')
    whole_data['tags'] = list(tags_padded)
    tags_flat = [tag for sublist in whole_data['tags'].tolist() for tag in sublist]
    unique_tags = np.unique(tags_flat)
    return whole_data, unique_tags

whole_data, unique_tags = preprocess_data('../../../data/place_data.csv')


Preprocessing data... 🛠️ orange


In [3]:
whole_data


Unnamed: 0,name,rating,place_id,tags,locationYX
0,Kadikoy Ferry Terminal,4.6,370509,"[1, 9, 14, 0, 0, 0, 0]","40.99269778351916,29.023280555674663"
1,Kadikoy Bull Statue,4.5,761356,"[1, 3, 6, 0, 0, 0, 0]","40.990473264783475,29.029131932189433"
2,Kadikoy-moda Streets,4.6,877687,"[13, 14, 15, 0, 0, 0, 0]","41.0300084184215,28.98441527977153"
3,Moda Beach Park,4.7,292410,"[9, 11, 12, 0, 0, 0, 0]","40.98000940235465,29.026848556608424"
4,IDEA Kadikoy,4.6,827370,"[9, 14, 16, 0, 0, 0, 0]","40.98015752580784,29.02810950338273"
...,...,...,...,...,...
182,Palace of the Porphyrogenitus,4.5,871687,"[2, 3, 4, 7, 0, 0, 0]","41.03788124723142,28.93945445055248"
183,Column of Marcian,4.3,265458,"[1, 2, 3, 7, 0, 0, 0]","41.02630809319681,28.95342213409043"
184,Beyazıt Tower,4.5,505870,"[1, 2, 3, 7, 0, 0, 0]","41.01817417223928,28.96347312671263"
185,Grand Bazaar,4.1,102220,"[1, 2, 3, 7, 9, 17, 0]","41.014800403046365,28.967378072061717"


In [4]:
unique_tags


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22], dtype=int32)

In [5]:
# Load user data
user_data = pd.read_csv('../../../data/historical/historical_interactions.csv')

# Preprocess user_data
user_data['user_id'] = user_data.index
unique_user_ids = user_data['user_id'].unique()
unique_place_ids = user_data['place_id'].unique()
user_id_to_index = {user_id: index for index, user_id in enumerate(unique_user_ids)}
place_id_to_index = {place_id: index for index, place_id in enumerate(unique_place_ids)}
user_data['user_index'] = user_data['user_id'].map(user_id_to_index)
user_data['place_index'] = user_data['place_id'].map(place_id_to_index)

user_data

Unnamed: 0,user_id,user_gender,place_id,continent,score,user_age,user_index,place_index
0,0,Male,563837,2,1,54,0,0
1,1,Male,776089,2,0,54,1,1
2,2,Male,207024,2,1,54,2,2
3,3,Male,711495,2,1,54,3,3
4,4,Male,463612,2,1,54,4,4
...,...,...,...,...,...,...,...,...
10005,10005,Recommender,289564,-1,1,-1,10005,22
10006,10006,Recommender,432443,-1,0,-1,10006,63
10007,10007,Recommender,697189,-1,0,-1,10007,59
10008,10008,Recommender,133470,-1,1,-1,10008,93


In [6]:
num_users = user_data['user_index'].nunique()
num_places = user_data['place_index'].nunique()

num_users, num_places

(10010, 188)

In [7]:
# Create and Train NCF Model
def create_ncf_model(num_users, num_places, embedding_dim=50, hidden_layers=[64, 32, 16, 8]):
    user_input = Input(shape=(1,), name='user_input')
    user_embedding = Embedding(input_dim=num_users, output_dim=embedding_dim, name='user_embedding')(user_input)
    user_embedding = Flatten()(user_embedding)
    
    place_input = Input(shape=(1,), name='place_input')
    place_embedding = Embedding(input_dim=num_places, output_dim=embedding_dim, name='place_embedding')(place_input)
    place_embedding = Flatten()(place_embedding)
    
    gmf_vector = Multiply()([user_embedding, place_embedding])
    mlp_vector = Concatenate()([user_embedding, place_embedding])
    
    for units in hidden_layers:
        mlp_vector = Dense(units, activation='relu')(mlp_vector)
        
    combined_vector = Concatenate()([gmf_vector, mlp_vector])
    output = Dense(1, activation='sigmoid')(combined_vector)
    
    model = Model(inputs=[user_input, place_input], outputs=output)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

ncf_model = create_ncf_model(num_users, num_places)
ncf_model.summary()

In [8]:
def train_ncf_model(model, user_data, batch_size=32, epochs=5):
    if 'user_index' not in user_data.columns or 'place_index' not in user_data.columns or 'score' not in user_data.columns:
        raise ValueError("user_data must contain 'user_index', 'place_index', and 'score' columns.")

    user_indices = user_data['user_index'].values
    place_indices = user_data['place_index'].values
    interactions = user_data['score'].values

    print("Training model... 🚀")

    model.fit(
        x=[user_indices, place_indices],
        y=interactions,
        batch_size=batch_size,
        epochs=epochs,
        validation_split=0.2
    )

    print("Training complete! 🎉")

    path = '../.models/ncf_model.keras'
    model.save(path)
    
    
train_ncf_model(ncf_model, user_data, batch_size=32, epochs=5)
ncf_model.summary()

Training model... 🚀
Epoch 1/5
