In [19]:
import pandas as pd
# Prepare TensorFlow dataset
import numpy as np
import tensorflow as tf
import tensorflow_recommenders as tfrs

In [71]:
# Load the data
whole_data = pd.read_excel('data/whole_data_cleaned.xlsx')
whole_data.drop(["website", "place_links", "description", "territory_id.1"], axis=1, inplace=True)

# Convert tags to a comma-separated string, handling non-iterable values
def convert_tags(x):
    if isinstance(x, (list, tuple)):
        return ','.join(map(str, x))
    return str(x)

whole_data['tags'] = whole_data['tags'].apply(convert_tags)

# Ensure all columns are in the correct format
whole_data['rating'] = whole_data['rating'].astype(np.float32)
whole_data['territory_id'] = whole_data['territory_id'].astype(np.int64)  # Convert territory_id to float32

# Convert the DataFrame to a TensorFlow dataset
dataset = tf.data.Dataset.from_tensor_slices({
    'name': whole_data['name'].values,
    'rating': whole_data['rating'].values,
    'territory_id': whole_data['territory_id'].values,
    'tags': whole_data['tags'].values
})

# Display a sample of the dataset to verify the structure
for element in dataset.take(1):
    print(element)


{'name': <tf.Tensor: shape=(), dtype=string, numpy=b'Kadikoy Ferry Terminal'>, 'rating': <tf.Tensor: shape=(), dtype=float32, numpy=4.6>, 'territory_id': <tf.Tensor: shape=(), dtype=int64, numpy=2>, 'tags': <tf.Tensor: shape=(), dtype=string, numpy=b'1,9,14'>}


In [72]:
class LocationAwareModel(tfrs.Model):

    def __init__(self):
        super().__init__()

        # Embedding for territory_id
        self.territory_model = tf.keras.Sequential([
            tf.keras.layers.IntegerLookup(vocabulary=np.unique(whole_data['territory_id'])),
            tf.keras.layers.Embedding(len(np.unique(whole_data['territory_id'])) + 1, 32)
        ])

        # Embedding for tags
        self.tags_vectorizer = tf.keras.layers.TextVectorization(output_mode='int')
        self.tags_vectorizer.adapt(whole_data['tags'])
        self.tags_model = tf.keras.Sequential([
            self.tags_vectorizer,
            tf.keras.layers.Embedding(input_dim=self.tags_vectorizer.vocabulary_size(), output_dim=32),
            tf.keras.layers.GlobalAveragePooling1D(),
            tf.keras.layers.Dense(32, activation='relu')
        ])

        # Rating model
        self.rating_model = tf.keras.Sequential([
            tf.keras.layers.Normalization(mean=np.mean(whole_data['rating']), variance=np.var(whole_data['rating'])),
            tf.keras.layers.Dense(32, activation='relu')
        ])

        candidate_dataset = dataset.map(lambda x: x['name'])

        # Modify the lambda function to use tf.gather
        territory_embeddings = self.territory_model.embeddings  # Assuming territory_model contains embeddings
        candidate_embeddings = candidate_dataset.map(
            lambda name: tf.gather(territory_embeddings, name['territory_id'])
        )

        # Metrics and Loss
        self.task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=candidate_embeddings
            )
        )

    def compute_loss(self, features, training=False):
        territory_embeddings = self.territory_model(features['territory_id'])
        tags_embeddings = self.tags_model(features['tags'])
        rating_embeddings = self.rating_model(features['rating'])

        # Combine embeddings
        embeddings = tf.concat([territory_embeddings, tags_embeddings, rating_embeddings], axis=1)

        return self.task(embeddings, embeddings)

# Instantiate and compile the model
model = LocationAwareModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

# Shuffle and batch data for training
dataset = dataset.shuffle(10000).batch(128).cache()

# Train the model
model.fit(dataset, epochs=30)

AttributeError: 'Sequential' object has no attribute 'embeddings'