In [29]:
from typing import Dict, Text

import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs
import pandas as pd

### Get the data for the courses

In [30]:
courses = pd.read_csv("data/courses.csv")
interactions = pd.read_csv("data/course_interactions.csv")



In [31]:
# Select only the features that we need
courses = courses[["id", "name"]]
courses['name'] = courses['name'].astype(str)
courses.rename({"id": "courseId"}, axis=1, inplace=True)



interactions = pd.merge(courses, interactions, on="courseId")
interactions = interactions[["name", "userId"]]
# Convert the Id's to strings
interactions["userId"] = interactions["userId"].astype(str)
interactions["name"] = interactions["name"].astype(str)

courses = courses["name"]

In [32]:
# Convert the dataframes into tf.data.Dataset's
interactions = tf.data.Dataset.from_tensor_slices(dict(interactions))
courses = tf.data.Dataset.from_tensor_slices(courses)

In [33]:
user_ids_vocab = tf.keras.layers.StringLookup(mask_token=None)  
user_ids_vocab.adapt(interactions.map(lambda x: x["userId"]))

course_names_vocab = tf.keras.layers.StringLookup(mask_token=None)
course_names_vocab.adapt(courses)

In [34]:
# CourseModel
class CourseModel(tfrs.Model):
    def __init__(self,
                 user_model: tf.keras.Model,
                 course_model: tf.keras.Model,
                 task: tfrs.tasks.Retrieval):
        
        super().__init__()

        self.user_model = user_model
        self.course_model = course_model
        self.task = task

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        user_embeddings = self.user_model(features["userId"])
        course_embeddings = self.course_model(features["name"])

        return self.task(user_embeddings, course_embeddings)

In [35]:
# Start defining each model
user_model = tf.keras.Sequential([
    user_ids_vocab,
    tf.keras.layers.Embedding(user_ids_vocab.vocabulary_size(), 64)
])

course_model = tf.keras.Sequential([
    course_names_vocab,
    tf.keras.layers.Embedding(course_names_vocab.vocabulary_size(), 64)
])


task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
    courses.batch(2).map(course_model)
    )
)

In [36]:
model = CourseModel(user_model, course_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [37]:
model.fit(interactions.batch(1), epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x15de6b5abd0>

ValueError: This model has not yet been built. Build the model first by calling `build()` or by calling the model on a batch of data.

In [49]:
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
index.index_from_dataset(courses.batch(2).map(lambda name: (name, model.course_model(name))))

_, names = index(np.array(["2"]))
print(f"Recommendations for user 2: {names}")


Recommendations for user 2: [[b'AP United States History Course - AP Central - College Board'
  b'Cloud Computing Certification Courses'
  b'A Systematic Literature Review of Teaching and Learning ...'
  b'Best Data Visualization Courses Online'
  b'2022-2023 History Course Schedule'
  b'English 101: English Literature Course'
  b'Communication Degree Courses'
  b'Best 15 Business Analytics Courses Online'
  b'Languages - Free courses - OpenLearn'
  b'Top 10 reasons to consider a Financial Management Course']]


In [55]:
tf.keras.models.save_model(
    index,
    'recommendations-model',
    overwrite=True,
    include_optimizer=True,
    save_format=None,
    signatures=None,
    options=None
)









INFO:tensorflow:Assets written to: recommendations-model\assets


INFO:tensorflow:Assets written to: recommendations-model\assets








