In [55]:
import numpy as np
import pandas as pd

In [56]:
df = pd.read_csv('../data/userDataRecency.csv')

df['time_of_day'] = pd.Categorical(df['time_of_day'], categories=['Morning', 'Afternoon', 'Evening', 'Night'])
df

Unnamed: 0,food_name,time_of_day,recency_days
0,veggie patty,Evening,1.0
1,salad,Evening,1.0
2,chicken with mole,Evening,1.0
3,camomile tea,Afternoon,1.0
4,quesadilla,Afternoon,1.0
5,scrambled eggs,Afternoon,1.0
6,cheese ziti pasta,Afternoon,0.95
7,omelette,Afternoon,0.9
8,cereal,Afternoon,0.8
9,scrambled eggs,Afternoon,0.8


In [57]:
from sklearn.preprocessing import LabelEncoder

data_encoded = pd.get_dummies(df[['time_of_day']]) # time of day is Night, Morning, Afternoon, Evening
data_encoded = pd.concat([df[['food_name']], data_encoded], axis=1) # food_name is a string 

data_encoded = pd.concat([data_encoded, df['recency_days']], axis=1)

y = df['food_name'].values

encoder = LabelEncoder()
encoder.fit(y)
encoded_y = encoder.transform(y)

data_encoded['encoded_y'] = encoded_y

# data_encoded.sort_values('food_name', inplace=True)

data_encoded.head(10)

Unnamed: 0,food_name,time_of_day_Morning,time_of_day_Afternoon,time_of_day_Evening,time_of_day_Night,recency_days,encoded_y
0,veggie patty,False,False,True,False,1.0,27
1,salad,False,False,True,False,1.0,21
2,chicken with mole,False,False,True,False,1.0,5
3,camomile tea,False,True,False,False,1.0,1
4,quesadilla,False,True,False,False,1.0,19
5,scrambled eggs,False,True,False,False,1.0,22
6,cheese ziti pasta,False,True,False,False,0.95,4
7,omelette,False,True,False,False,0.9,17
8,cereal,False,True,False,False,0.8,3
9,scrambled eggs,False,True,False,False,0.8,22


In [58]:
from sklearn.model_selection import train_test_split
import tensorflow.keras.backend as K
import tensorflow as tf


def custom_weighted_loss(y_true, y_pred):
    # Assuming y_true is one-hot encoded labels and y_pred is predicted probabilities
    weights = 1 + y_true[:, -1]  # Assuming the last column of y_true corresponds to recency_days
    # Calculate the weighted cross-entropy loss
    loss = K.categorical_crossentropy(y_true, y_pred) * weights
    return loss

In [59]:
top_n = 5
y = data_encoded['encoded_y'].values
X = data_encoded.drop(['food_name', 'encoded_y'], axis=1).values

print(tf.__version__)

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)
X_train = X
y_train = y

num_classes = len(np.unique(y_train))
num_features = X_train.shape[1]
print(num_classes, len(y_train)) # 139


y_train = tf.keras.utils.to_categorical(y_train, num_classes)

X_train = X_train.astype(np.float32)

X_train


2.16.1
28 33


array([[0.  , 0.  , 1.  , 0.  , 1.  ],
       [0.  , 0.  , 1.  , 0.  , 1.  ],
       [0.  , 0.  , 1.  , 0.  , 1.  ],
       [0.  , 1.  , 0.  , 0.  , 1.  ],
       [0.  , 1.  , 0.  , 0.  , 1.  ],
       [0.  , 1.  , 0.  , 0.  , 1.  ],
       [0.  , 1.  , 0.  , 0.  , 0.95],
       [0.  , 1.  , 0.  , 0.  , 0.9 ],
       [0.  , 1.  , 0.  , 0.  , 0.8 ],
       [0.  , 1.  , 0.  , 0.  , 0.8 ],
       [0.  , 0.  , 1.  , 0.  , 0.75],
       [0.  , 0.  , 1.  , 0.  , 0.75],
       [0.  , 1.  , 0.  , 0.  , 0.75],
       [1.  , 0.  , 0.  , 0.  , 0.7 ],
       [0.  , 0.  , 1.  , 0.  , 0.7 ],
       [0.  , 0.  , 1.  , 0.  , 0.7 ],
       [0.  , 0.  , 1.  , 0.  , 0.7 ],
       [0.  , 0.  , 1.  , 0.  , 0.7 ],
       [1.  , 0.  , 0.  , 0.  , 0.65],
       [1.  , 0.  , 0.  , 0.  , 0.65],
       [1.  , 0.  , 0.  , 0.  , 0.65],
       [0.  , 1.  , 0.  , 0.  , 0.6 ],
       [0.  , 1.  , 0.  , 0.  , 0.6 ],
       [0.  , 0.  , 1.  , 0.  , 0.45],
       [0.  , 0.  , 1.  , 0.  , 0.45],
       [0.  , 0.  , 1.  ,

In [60]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(num_features,), name='input'),
    tf.keras.layers.Dense(64, activation='relu', name='hidden1'),
    tf.keras.layers.Dense(32, activation='relu', name='hidden2'),
    tf.keras.layers.Dense(num_classes, activation='softmax', name='output')
])

model.compile(optimizer='adam', loss=custom_weighted_loss, metrics=[tf.keras.metrics.TopKCategoricalAccuracy(k=top_n)])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [61]:
# model.fit(X_train, y_train, batch_size=32, epochs=40, validation_data=(X_test, y_test))
model.fit(X_train, y_train, batch_size=32, epochs=100)

Epoch 1/100


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 3.4325 - top_k_categorical_accuracy: 0.2449
Epoch 2/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 3.4217 - top_k_categorical_accuracy: 0.2449 
Epoch 3/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 3.4145 - top_k_categorical_accuracy: 0.2449 
Epoch 4/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 3.4086 - top_k_categorical_accuracy: 0.2449 
Epoch 5/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 3.3687 - top_k_categorical_accuracy: 0.2756 
Epoch 6/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 3.3645 - top_k_categorical_accuracy: 0.3368 
Epoch 7/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 3.3988 - top_k_categorical_accuracy: 0.2958 
Epoch 8/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

<keras.src.callbacks.history.History at 0x165c3acd0>

In [62]:
maxDay = max(data_encoded['recency_days'])
permutations_day = np.array([[1, 0, 0, 0, maxDay], [0, 1, 0, 0, maxDay], [0, 0, 1, 0, maxDay], [0, 0, 0, 1, maxDay]], dtype=np.float32)


predictions = model.predict(permutations_day)



top_n_indices = np.argsort(predictions, axis=1)[:, -top_n:][:, ::-1]
top_n_probabilities = np.sort(predictions, axis=1)[:, -top_n:][:, ::-1]


top_n_indices, top_n_probabilities

flattened =  encoder.inverse_transform(top_n_indices.flatten())

top_n_labels = flattened.reshape(top_n_indices.shape)

top_n_labels, top_n_probabilities

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step


(array([['quesadilla', 'tres leches slice', 'ham and cheese sandwich',
         'eggs', 'subways 6 inch sandwich'],
        ['scrambled eggs', 'quesadilla', 'cheese ziti pasta', 'guacamole',
         'subways 6 inch sandwich'],
        ['veggie patty', 'slices of ripe avocado',
         'cauliflower rice mixed with vegetables', 'chicken with mole',
         'scrambled eggs'],
        ['scrambled eggs', 'quesadilla', 'cheese ziti pasta',
         'subways 6 inch sandwich', 'guacamole']], dtype=object),
 array([[0.1825688 , 0.18048978, 0.16738988, 0.08281657, 0.0753742 ],
        [0.24278055, 0.1687989 , 0.13526115, 0.08976988, 0.08752144],
        [0.08128743, 0.0685839 , 0.06720307, 0.06460667, 0.05470838],
        [0.11289866, 0.0889662 , 0.07536335, 0.06403782, 0.05770645]],
       dtype=float32))

In [63]:
time_of_day_categories = ['Morning', 'Afternoon', 'Evening', 'Night']

with open('../out/recency_based_output_tensorflow_network.txt', 'w') as f:
    # Iterate over time of day categories
    for time_of_day, predictions, probabilities in zip(time_of_day_categories, top_n_labels, top_n_probabilities):
        # Write time of day
        f.write("Time of Day: {}\n".format(time_of_day))
        f.write("Predictions:\n")
        # Iterate over predictions and probabilities
        for i, (prediction, probability) in enumerate(zip(predictions, probabilities), 1):
            # Write each prediction and its probability
            f.write("    - Prediction {}: {}, Probability: {:.2f}%\n".format(i, prediction, probability * 100))
        # Add a newline for better readability between time of day categories
        f.write("\n")
