# Imports

In [1]:
import os
import pprint
import tempfile
from typing import Dict, Text

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

import pandas as pd
import sampling

import tensorflow_recommenders as tfrs

import pickle

import datetime

from tensorflow.keras.layers import Flatten   
from tensorflow.keras.layers import Dense     

import TensorflowRichFeatures as tfrs_rich

# from google.colab import drive
# drive.mount('/content/drive')

# Constants

## Load:

In [None]:
RATINGS_BASE = "./drive/MyDrive/Colab Notebooks/Recipes_new/Data/base/ratings_base.parquet"
RECIPES_BASE = "./drive/MyDrive/Colab Notebooks/Recipes_new/Data/base/recipes_base.parquet"

CONCAT_ING_CAT= "./drive/MyDrive/Colab Notebooks/Recipes_new/Data/samples/concatenated_ing_cat_df.obj"

# Load data

In [None]:
recipes_small = pd.read_parquet(RECIPES_BASE)
ratings_small = pd.read_parquet(RATINGS_BASE)

with open(CONCAT_ING_CAT, "rb") as input_file:
    concatenated_ing_cat_df = pickle.load(input_file)

## Ratings

In [None]:
ratings_sample = sampling.get_ratings_with_min_number_list(ratings_small, [20, 10])
ratings_sample.RecipeId = ratings_sample.RecipeId.apply(lambda x: int(x))
recipes_small.RecipeId = recipes_small.RecipeId.apply(lambda x: int(x))
recipe_ids_in_sample = list(set(ratings_sample.RecipeId))

## Recipes

In [None]:
recipes_subset = recipes_small[["RecipeId"]].merge(concatenated_ing_cat_df, on="RecipeId", how="inner")

In [None]:
recipes_subset["Ingredients_Category"] = recipes_subset["Concatenated"].map(lambda x: " ".join(x))
recipes_subset = recipes_subset[recipes_subset.RecipeId.isin(recipe_ids_in_sample)]

In [None]:
merged_dataset = ratings_sample.merge(recipes_subset, on="RecipeId", how="inner")
merged_dataset.drop(columns=["Concatenated", "DateSubmitted"], inplace=True)

# Prepare dataset

In [None]:
from sklearn.model_selection import train_test_split as train_test_split

def create_train_test_dataframe(ratings_df, test_size, random_state):
    x_train, x_test, y_train, y_test = train_test_split(ratings_df[["AuthorId", "RecipeId", "Ingredients_Category"]], 
                                                        ratings_df[["Rating"]], 
                                                        test_size=test_size, 
                                                        random_state=random_state, 
                                                        stratify=ratings_df["AuthorId"])
    trainset = x_train.merge(y_train, left_index=True, right_index=True)
    testset = x_test.merge(y_test, left_index=True, right_index=True)

    return trainset, testset

In [None]:
trainset, testset = create_train_test_dataframe(merged_dataset, 0.2, 13)

## Recipes

In [None]:
recipes_subset.RecipeId = recipes_subset.RecipeId.map(lambda x: bytes(str(x), 'utf-8'))

recipes_dict = recipes_subset[['RecipeId','Ingredients_Category']]
recipes_dict = {name: np.array(value) for name, value in recipes_dict.items()}
recipes = tf.data.Dataset.from_tensor_slices(recipes_dict)


recipes = recipes.map(lambda x: {'RecipeId' : x['RecipeId'],
                                 'Ingredients_Category' : x['Ingredients_Category']})

In [None]:
for x in recipes.take(1).as_numpy_iterator():
    pprint.pprint(x)

{'Ingredients_Category': b'black pepper butter button mushroom celery chicken f'
                         b'lour milk parsley pepper pimiento worcestershire sau'
                         b'ce chicken',
 'RecipeId': b'44'}


In [None]:
recipes_dict = recipes_subset[['RecipeId']]
recipes_dict = {name: np.array(value) for name, value in recipes_dict.items()}
recipes_dataset = tf.data.Dataset.from_tensor_slices(recipes_dict)


recipes_dataset = recipes_dataset.map(lambda x: {'RecipeId' : x['RecipeId']})

In [None]:
for x in recipes_dataset.take(1).as_numpy_iterator():
    pprint.pprint(x)

{'RecipeId': b'44'}


## Ratings

In [None]:
ratings_sample.AuthorId = ratings_sample.AuthorId.map(lambda x: bytes(str(x), 'utf-8'))
ratings_sample.RecipeId = ratings_sample.RecipeId.map(lambda x: bytes(str(x), 'utf-8'))

ratings_dict = ratings_sample[['AuthorId', 'RecipeId']]
ratings_dict = {name: np.array(value) for name, value in ratings_dict.items()}
ratings = tf.data.Dataset.from_tensor_slices(ratings_dict)


ratings = ratings.map(lambda x: {'AuthorId' : x['AuthorId'], 
                                 'RecipeId' : x['RecipeId']})

## Merged

In [None]:
merged_dataset.AuthorId = merged_dataset.AuthorId.map(lambda x: bytes(str(x), 'utf-8'))
merged_dataset.RecipeId = merged_dataset.RecipeId.map(lambda x: bytes(str(x), 'utf-8'))

merged_dict = merged_dataset[['AuthorId', 'RecipeId', 'Ingredients_Category']]
merged_dict = {name: np.array(value) for name, value in merged_dict.items()}
merged_dataset = tf.data.Dataset.from_tensor_slices(merged_dict)


merged_dataset = merged_dataset.map(lambda x: {'AuthorId' : x['AuthorId'], 
                                 'RecipeId' : x['RecipeId'],
                                 'Ingredients_Category' : x['Ingredients_Category']})

In [None]:
for x in merged_dataset.take(1).as_numpy_iterator():
    pprint.pprint(x)

{'AuthorId': b'2312',
 'Ingredients_Category': b'cayenne pepper chicken breast cumin garlic ginger le'
                         b'mon lemon juice nutmeg paprika turmeric water chicke'
                         b'n breast',
 'RecipeId': b'780'}


In [None]:
trainset.AuthorId = trainset.AuthorId.map(lambda x: bytes(str(x), 'utf-8'))
trainset.RecipeId = trainset.RecipeId.map(lambda x: bytes(str(x), 'utf-8'))

trainset_dict = trainset[['AuthorId', 'RecipeId', 'Ingredients_Category']]
trainset_dict = {name: np.array(value) for name, value in trainset.items()}
trainset_dataset = tf.data.Dataset.from_tensor_slices(trainset_dict)


trainset_dataset = trainset_dataset.map(lambda x: {'AuthorId' : x['AuthorId'], 
                                 'RecipeId' : x['RecipeId'],
                                 'Ingredients_Category' : x['Ingredients_Category']})

In [None]:
for x in trainset_dataset.take(1).as_numpy_iterator():
    pprint.pprint(x)

{'AuthorId': b'68727',
 'Ingredients_Category': b'carrot celery dry marjoram ham hock onion pepper ',
 'RecipeId': b'112831'}


In [None]:
testset.AuthorId = testset.AuthorId.map(lambda x: bytes(str(x), 'utf-8'))
testset.RecipeId = testset.RecipeId.map(lambda x: bytes(str(x), 'utf-8'))

testset_dict = testset[['AuthorId', 'RecipeId', 'Ingredients_Category']]
testset_dict = {name: np.array(value) for name, value in testset.items()}
testset_dataset = tf.data.Dataset.from_tensor_slices(testset_dict)


testset_dataset = testset_dataset.map(lambda x: {'AuthorId' : x['AuthorId'], 
                                 'RecipeId' : x['RecipeId'],
                                 'Ingredients_Category' : x['Ingredients_Category']})

## Unique values

In [None]:
recipe_ids = recipes.batch(1).map(lambda x: x["RecipeId"])
user_ids = ratings.batch(1_000_000).map(lambda x: x["AuthorId"])

unique_recipe_ids = np.unique(np.concatenate(list(recipe_ids)))
unique_user_ids = np.unique(np.concatenate(list(user_ids)))

# TRAIN

## Split dataset

In [None]:
size = ratings_sample.shape[0]
train_size = int(trainset.shape[0])
val_size = int(0.15 * size)
test_size = int(testset.shape[0])

tf.random.set_seed(42)
test_shuffled = testset_dataset.shuffle(test_size, seed=42, reshuffle_each_iteration=False)
train_shuffled = trainset_dataset.shuffle(train_size, seed=42, reshuffle_each_iteration=False)


In [None]:
val = train_shuffled.take(val_size)

cached_train = train_shuffled.batch(8192).cache()
cached_test = test_shuffled.batch(4096).cache()
cached_val = val.batch(4096).cache()

In [None]:
cached_train.take(1)

<TakeDataset element_spec={'AuthorId': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'RecipeId': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'Ingredients_Category': TensorSpec(shape=(None,), dtype=tf.string, name=None)}>

## One layer

In [None]:
num_epochs = 100

model = tfrs_rich.CombinedModel(layer_sizes=[32], 
                      unique_recipe_ids=unique_recipe_ids, 
                      unique_user_ids=unique_user_ids, 
                      recipes_dataset=recipes,
                                verbose=True)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))


Init combined model
Query model init
USER MODEL INIT
Candidate model init
RECIPE MODEL INIT
Candidate model call
Inputs:  {'RecipeId': <tf.Tensor 'args_1:0' shape=(None,) dtype=string>, 'Ingredients_Category': <tf.Tensor 'args_0:0' shape=(None,) dtype=string>}
Recipe model call
INPUTS:  {'RecipeId': <tf.Tensor 'args_1:0' shape=(None,) dtype=string>, 'Ingredients_Category': <tf.Tensor 'args_0:0' shape=(None,) dtype=string>}


In [None]:
# %%time
one_layer_history = model.fit(
    cached_train,
    validation_data=cached_val,
    validation_freq=5,
    epochs=5,
    verbose=1)

# model.load_weights("./drive/MyDrive/Colab Notebooks/Recipes_new/Data/TFRS/features/retrieval/20_10/model_1/model_1_500_epochs")

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f583d6e63d0>