In [33]:
import pandas as pd
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Embedding, Dense, Flatten, Input, Concatenate, GlobalAveragePooling1D, Normalization
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics.pairwise import cosine_similarity
from tensorflow.keras import backend as K

# Load data
whole_data = pd.read_excel('data/whole_data_cleaned.xlsx')
whole_data.drop(["website","place_links","description"],axis=1,inplace=True)

# Preprocess whole_data: Convert 'tags' from comma-separated strings to lists of integers
def safe_int_convert(tag_list):
    if isinstance(tag_list, list):
        return tag_list
    elif isinstance(tag_list, (int, float)):
        return [int(tag_list)]
    elif isinstance(tag_list, str):
        try:
            return list(map(int, tag_list.split(',')))
        except ValueError:
            return []
    return []

whole_data['tags'] = whole_data['tags'].apply(safe_int_convert)

# Convert rating to float and handle any missing data
whole_data['rating'] = whole_data['rating'].astype(float).fillna(whole_data['rating'].mean())

# Drop unnecessary columns
whole_data.drop('territory_id', axis=1, inplace=True)

# Pad the sequences for tags
tags_padded = pad_sequences(whole_data['tags'], padding='post')
whole_data['tags'] = list(tags_padded)
# Extract unique tags
tags_flat = [tag for sublist in whole_data['tags'].tolist() for tag in sublist]
unique_tags = np.unique(tags_flat)

# Define the model
def create_model(unique_tags, embedding_dim=64, dense_units=128):
    # Tag embedding
    tag_input = Input(shape=(None,), dtype=tf.int32, name='tags')
    tag_lookup = tf.keras.layers.IntegerLookup(vocabulary=unique_tags, mask_token=None)
    tag_embedding = Embedding(input_dim=len(unique_tags) + 1, output_dim=embedding_dim)
    tag_embeddings = tag_embedding(tag_lookup(tag_input))
    tag_embeddings = GlobalAveragePooling1D()(tag_embeddings)
    
    # Normalize rating
    rating_input = Input(shape=(1,), dtype=tf.float32, name='rating')
    rating_normalization = Normalization(axis=None)
    
    # Manually specify input shape
    rating_normalization.build((None, 1))
    
    rating_normalized = rating_normalization(rating_input)
    
    # Concatenate all features
    combined_embeddings = Concatenate()([
        tag_embeddings,
        rating_normalized
    ])
    
    dense = Dense(dense_units, activation='relu')(combined_embeddings)
    output = Dense(embedding_dim)(dense)
    
    model = Model(inputs=[tag_input, rating_input], outputs=output)
    return model

# Adjusted hyperparameters
model = create_model(unique_tags, embedding_dim=64, dense_units=128)

def triplet_loss(anchor, positive, negative, margin=1.0):
    # Compute the distance between the anchor and the positive
    positive_distance = tf.reduce_sum(tf.square(anchor - positive), axis=-1)
    
    # Compute the distance between the anchor and the negative
    negative_distance = tf.reduce_sum(tf.square(anchor - negative), axis=-1)
    
    # Compute the triplet loss
    loss = tf.maximum(positive_distance - negative_distance + margin, 0.0)
    
    return tf.reduce_mean(loss)
# Create a dataset for triplet loss training
def generate_triplets(dataframe):
    anchor_features = {
        'tags': [],
        'rating': []
    }
    positive_features = {
        'tags': [],
        'rating': []
    }
    negative_features = {
        'tags': [],
        'rating': []
    }
    
    for _, row in dataframe.iterrows():
        anchor_features['tags'].append(row['tags'])
        anchor_features['rating'].append(row['rating'])
        
        positive_idx = np.random.choice(dataframe.index)
        negative_idx = np.random.choice(dataframe.index)
        
        positive_row = dataframe.loc[positive_idx]
        negative_row = dataframe.loc[negative_idx]
        
        positive_features['tags'].append(positive_row['tags'])
        positive_features['rating'].append(positive_row['rating'])
        
        negative_features['tags'].append(negative_row['tags'])
        negative_features['rating'].append(negative_row['rating'])
    
    return (
        {k: pad_sequences(v, padding='post') if k == 'tags' else np.array(v) for k, v in anchor_features.items()},
        {k: pad_sequences(v, padding='post') if k == 'tags' else np.array(v) for k, v in positive_features.items()},
        {k: pad_sequences(v, padding='post') if k == 'tags' else np.array(v) for k, v in negative_features.items()}
    )

anchor_features, positive_features, negative_features = generate_triplets(whole_data)

def triplet_generator(anchor_features, positive_features, negative_features, batch_size=32):
    while True:
        indices = np.arange(len(anchor_features['tags']))
        np.random.shuffle(indices)
        
        for start in range(0, len(anchor_features['tags']), batch_size):
            end = start + batch_size
            batch_indices = indices[start:end]
            
            anchor_batch = {k: v[batch_indices] for k, v in anchor_features.items()}
            positive_batch = {k: v[batch_indices] for k, v in positive_features.items()}
            negative_batch = {k: v[batch_indices] for k, v in negative_features.items()}
            
            yield (anchor_batch, positive_batch, negative_batch)

batch_size = 32
triplet_gen = triplet_generator(anchor_features, positive_features, negative_features, batch_size=batch_size)

# Compile the model with custom training loop
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

@tf.function
def train_step(anchor_batch, positive_batch, negative_batch):
    with tf.GradientTape() as tape:
        anchor_embeddings = model(anchor_batch, training=True)
        positive_embeddings = model(positive_batch, training=True)
        negative_embeddings = model(negative_batch, training=True)
        
        loss = triplet_loss(anchor_embeddings, positive_embeddings, negative_embeddings)
    
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

# Training loop
epochs = 100
steps_per_epoch = len(anchor_features['tags']) // batch_size

for epoch in range(epochs):
    for step in range(steps_per_epoch):
        anchor_batch, positive_batch, negative_batch = next(triplet_gen)
        loss = train_step(anchor_batch, positive_batch, negative_batch)
        
        if step % 100 == 0:
            print(f"Epoch {epoch + 1}, Step {step}, Loss: {loss.numpy()}")
            
# Save the trained model
model.save('triplet_model.h5')

Epoch 1, Step 0, Loss: 1.0083401203155518
Epoch 2, Step 0, Loss: 0.9972688555717468
Epoch 3, Step 0, Loss: 1.0030083656311035
Epoch 4, Step 0, Loss: 0.9957859516143799
Epoch 5, Step 0, Loss: 0.9894741177558899
Epoch 6, Step 0, Loss: 0.9753241539001465
Epoch 7, Step 0, Loss: 0.9937542676925659
Epoch 8, Step 0, Loss: 0.817984938621521
Epoch 9, Step 0, Loss: 0.9159425497055054
Epoch 10, Step 0, Loss: 0.9225462675094604
Epoch 11, Step 0, Loss: 0.922818660736084
Epoch 12, Step 0, Loss: 1.0551756620407104
Epoch 13, Step 0, Loss: 1.0166600942611694
Epoch 14, Step 0, Loss: 0.8570143580436707
Epoch 15, Step 0, Loss: 0.8706648349761963
Epoch 16, Step 0, Loss: 0.7432109117507935
Epoch 17, Step 0, Loss: 0.8500399589538574
Epoch 18, Step 0, Loss: 0.942444920539856
Epoch 19, Step 0, Loss: 0.6736522912979126
Epoch 20, Step 0, Loss: 0.8385230898857117
Epoch 21, Step 0, Loss: 0.8906406164169312
Epoch 22, Step 0, Loss: 1.047188401222229
Epoch 23, Step 0, Loss: 0.9801009893417358
Epoch 24, Step 0, Loss: 



Epoch 89, Step 0, Loss: 0.6569786071777344
Epoch 90, Step 0, Loss: 0.6663241982460022
Epoch 91, Step 0, Loss: 0.5401999950408936
Epoch 92, Step 0, Loss: 0.6070465445518494
Epoch 93, Step 0, Loss: 0.7369554042816162
Epoch 94, Step 0, Loss: 0.725275993347168
Epoch 95, Step 0, Loss: 0.7392619848251343
Epoch 96, Step 0, Loss: 0.5833296775817871
Epoch 97, Step 0, Loss: 0.7352955341339111
Epoch 98, Step 0, Loss: 0.6348308324813843
Epoch 99, Step 0, Loss: 0.7088451385498047
Epoch 100, Step 0, Loss: 0.5784915685653687


In [34]:
model = tf.keras.models.load_model('triplet_model.keras', custom_objects={'triplet_loss': triplet_loss})

# Generate embeddings for all locations
features = {
    'tags': np.array(pad_sequences(whole_data['tags'], padding='post')),
    'rating': np.array(whole_data['rating'])
}
location_embeddings = model.predict(features)

# Compute cosine similarity matrix
similarity_matrix = cosine_similarity(location_embeddings)

# Function to generate recommendations for a specific place
def recommend(input_features, k=20):
    input_dict = {
        'tags': tf.convert_to_tensor([input_features['tags']], dtype=tf.int32),
        'rating': tf.convert_to_tensor([input_features['rating']], dtype=tf.float32)
    }
    
    # Generate the query embedding
    query_embedding = model.predict(input_dict)
    location_embeddings = model.predict(features)
    
    # Compute cosine similarity between the query embedding and all location embeddings
    similarities = cosine_similarity(query_embedding, location_embeddings)
    
    # Get the top-k most similar locations
    top_k_indices = similarities[0].argsort()[-k:][::-1]
    
    return top_k_indices

# Example usage
input_features = {
    'tags': [1,2,3,5],  # Example tags
    'rating': 4.0
}

recommendations = recommend(input_features, k=10)
print("Recommended items:", recommendations)

# Fetching the items from the dataset
recommended_items = whole_data.iloc[recommendations]
recommended_items


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Recommended items: [ 58 159   6 114  96  90  69 166 174  85]


Unnamed: 0,name,rating,tags,territory_id.1,locationYX
58,Historical Plane Tree of Çengelköy,4.3,"[1, 2, 3, 12, 13, 14, 15]",1,"41.05022883927636, 29.05278206286947"
159,Sultanahmet Square,4.6,"[1, 2, 3, 12, 13, 14, 15]",3,"41.00640399569341, 28.976145898680326"
6,Bagdat Avenue,4.6,"[13, 14, 15, 16, 17, 0, 0]",2,"40.9683886821237, 29.065833609170745"
114,French Street Cultural Center,4.2,"[2, 13, 14, 15, 16, 0, 0]",0,"41.03218963326699, 28.979591696647134"
96,Ataturk Arboretum,4.6,"[9, 10, 11, 12, 13, 14, 0]",0,"41.17686871834661, 28.985596773928652"
90,Yıldız Park,4.7,"[9, 10, 11, 12, 13, 14, 0]",0,"41.04950796854129, 29.015295073702305"
69,Kuzguncuk Icadiye Street,4.6,"[12, 13, 14, 15, 17, 0, 0]",1,"41.034480060651845, 29.030833399333115"
166,Turkish and Islamic Arts Museum,4.6,"[2, 3, 4, 5, 6, 0, 0]",3,"41.01085761893604, 28.975403362743883"
174,Istanbul Museum of the History of Science and ...,4.4,"[2, 3, 4, 5, 6, 0, 0]",3,"41.0192882800712, 28.978594949501012"
85,Yıldız Hamidiye Mosque,4.9,"[1, 2, 3, 5, 0, 0, 0]",0,"41.05168636373972, 29.009377384401752"


# Collabrative Filtering

In [85]:
# Load user data
user_data = pd.read_excel('generated_user_data.xlsx')
user_data

Unnamed: 0,user_id,user_gender,place_id,continent,score,user_age
0,1,Male,563837,2,1,54
1,1,Male,776089,2,0,54
2,1,Male,207024,2,1,54
3,1,Male,711495,2,1,54
4,1,Male,463612,2,1,54
...,...,...,...,...,...,...
9995,1000,Male,871687,1,1,51
9996,1000,Male,950867,1,1,51
9997,1000,Male,761356,1,1,51
9998,1000,Male,723599,1,1,51


In [86]:
user_data[user_data['user_id'] == 1]

Unnamed: 0,user_id,user_gender,place_id,continent,score,user_age
0,1,Male,563837,2,1,54
1,1,Male,776089,2,0,54
2,1,Male,207024,2,1,54
3,1,Male,711495,2,1,54
4,1,Male,463612,2,1,54
5,1,Male,733989,2,1,54
6,1,Male,375467,2,1,54
7,1,Male,992964,2,1,54
8,1,Male,342528,2,1,54
9,1,Male,459627,2,1,54


In [87]:
# Preprocess user_data
user_data['user_id'] = user_data.index

# Extract unique user_ids and place_ids
unique_user_ids = user_data['user_id'].unique()
unique_place_ids = user_data['place_id'].unique()

# Create mappings for user_ids and place_ids
user_id_to_index = {user_id: index for index, user_id in enumerate(unique_user_ids)}
place_id_to_index = {place_id: index for index, place_id in enumerate(unique_place_ids)}

# Map user_ids and place_ids to indices
user_data['user_index'] = user_data['user_id'].map(user_id_to_index)
user_data['place_index'] = user_data['place_id'].map(place_id_to_index)

In [88]:
user_data

Unnamed: 0,user_id,user_gender,place_id,continent,score,user_age,user_index,place_index
0,0,Male,563837,2,1,54,0,0
1,1,Male,776089,2,0,54,1,1
2,2,Male,207024,2,1,54,2,2
3,3,Male,711495,2,1,54,3,3
4,4,Male,463612,2,1,54,4,4
...,...,...,...,...,...,...,...,...
9995,9995,Male,871687,1,1,51,9995,72
9996,9996,Male,950867,1,1,51,9996,85
9997,9997,Male,761356,1,1,51,9997,64
9998,9998,Male,723599,1,1,51,9998,101


In [89]:
from keras.src.layers import Multiply
from tensorflow.keras.layers import Input, Embedding, Dot, Add, Flatten, Dense
from tensorflow.keras.models import Model

def create_ncf_model(num_users, num_places, embedding_dim=50, hidden_layers=[64, 32, 16, 8]):
    # User input and embedding
    user_input = Input(shape=(1,), name='user_input')
    user_embedding = Embedding(input_dim=num_users, output_dim=embedding_dim, name='user_embedding')(user_input)
    user_embedding = Flatten()(user_embedding)
    
    # Place input and embedding
    place_input = Input(shape=(1,), name='place_input')
    place_embedding = Embedding(input_dim=num_places, output_dim=embedding_dim, name='place_embedding')(place_input)
    place_embedding = Flatten()(place_embedding)
    
    # GMF part: element-wise product of user and place embeddings
    gmf_vector = Multiply()([user_embedding, place_embedding])
    
    # MLP part: concatenate user and place embeddings
    mlp_vector = Concatenate()([user_embedding, place_embedding])
    
    # Hidden layers for MLP
    for units in hidden_layers:
        mlp_vector = Dense(units, activation='relu')(mlp_vector)
        
    
    # Concatenate GMF and MLP parts
    combined_vector = Concatenate()([gmf_vector, mlp_vector])
    
    # Final prediction layer
    output = Dense(1, activation='sigmoid')(combined_vector)
    
    # Build and compile model
    model = Model(inputs=[user_input, place_input], outputs=output)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

num_users = len(unique_user_ids)
num_places = len(unique_place_ids)
embedding_dim = 50

ncf_model = create_ncf_model(num_users, num_places, embedding_dim)


In [90]:
# Prepare training data
user_indices = user_data['user_index'].values
place_indices = user_data['place_index'].values
interactions = user_data['score'].values  # Assuming binary interaction (0 or 1)

# Train the model
ncf_model.fit(
    x=[user_indices, place_indices],
    y=interactions,
    batch_size=32,
    epochs=15,
    validation_split=0.2
)

# Save the trained NCF model
ncf_model.save('ncf_model.h5')


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


  saving_api.save_model(


In [93]:
def ncf_recommend(user_id, model, place_data, user_id_to_index, k=10):
    user_index = user_id_to_index[user_id]
    user_vector = np.full((len(place_data),), user_index)
    place_indices = np.arange(len(place_data))
    
    # Predict scores for all places for the given user
    predictions = model.predict([user_vector, place_indices])
    top_k_indices = predictions.flatten().argsort()[-k:][::-1]
    
    # Get the recommended places
    recommended_places = place_data.iloc[top_k_indices]
    return recommended_places

# Example usage
user_id = 10  # Replace with the actual user_id
recommended_places = ncf_recommend(user_id, ncf_model, whole_data, user_id_to_index, k=10)
print("Recommended places for user:", user_id)
print(recommended_places)


Recommended places for user: 10
                                             name  rating  \
17                                  Moda Sea Club     4.8   
28                               Hasanpasa Mosque     4.0   
29                                      Dorock XL     3.5   
89                                 Çırağan Palace     4.7   
72                   Filizler Meatball Restaurant     4.0   
94                                     Bebek Park     4.5   
181  Venerable Patriarchal Church of Saint George     4.6   
34                             Sekerci Cafer Erol     4.7   
37                      Omer Faruk Toprak Library     4.6   
176                          The Stone of Million     4.6   

                         tags  territory_id.1  \
17   [13, 16, 17, 0, 0, 0, 0]               2   
28      [1, 2, 3, 5, 0, 0, 0]               2   
29     [15, 0, 0, 0, 0, 0, 0]               2   
89      [1, 2, 3, 4, 7, 0, 0]               0   
72     [13, 0, 0, 0, 0, 0, 0]               1   
94

In [94]:
# Example usage
user_id = 10  # Replace with the actual user_id
recommended_places = ncf_recommend(user_id, ncf_model, whole_data, k=10)
print("Recommended places for user:", user_id)
print(recommended_places)

TypeError: ncf_recommend() missing 1 required positional argument: 'user_id_to_index'

In [95]:
user_data[user_data["user_id"] == 1231]


Unnamed: 0,user_id,user_gender,place_id,continent,score,user_age,user_index,place_index
1231,1231,Female,207141,4,1,53,1231,150


In [96]:
input_features

{'tags': [1, 2, 3, 4, 6, 7, 9, 11, 12, 13, 14, 15, 16], 'rating': 4.5}

In [122]:
# Example usage
input_features = {
    'tags': [1, 2, 3, 4, 6, 7, 9, 11, 12, 13, 14, 15, 16],  # Example tags
    'rating': 4.5
}
user_id = 0  # Replace with the actual user_id

recommended_items = weighted_hybrid_recommend(input_features, user_id, model, ncf_model, whole_data, user_id_to_index, cb_weight=0, cf_weight=0.9, k=10)



In [123]:
print("Recommended items for user:", user_id)

Recommended items for user: 0


In [124]:
print(recommended_items)

                                     name  rating                       tags  \
187         Ruins of Philanthropos Church     4.5      [1, 2, 3, 5, 0, 0, 0]   
58     Historical Plane Tree of Çengelköy     4.3  [1, 2, 3, 12, 13, 14, 15]   
67                               KidZania     4.3    [18, 20, 0, 0, 0, 0, 0]   
66      Emaar Aquarium and Underwater Zoo     4.3   [18, 20, 21, 0, 0, 0, 0]   
65                    Small Camlica Grove     4.5   [9, 10, 11, 13, 0, 0, 0]   
64                               Nevmekan     4.3     [13, 0, 0, 0, 0, 0, 0]   
63            Historical Kuzguncuk Houses     4.7      [1, 2, 3, 7, 0, 0, 0]   
62   Mahpeyker Kösem Valide Sultan Mosque     4.6      [1, 2, 3, 5, 0, 0, 0]   
61                 Mihrimah Sultan Mosque     4.8      [1, 2, 3, 5, 0, 0, 0]   
60          Mehmet Naci Akgöz Kite Museum     4.5      [4, 6, 0, 0, 0, 0, 0]   

     territory_id.1                              locationYX  
187               3   41.02291936064259, 28.9844984538675