In [62]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

In [63]:
user = pd.read_csv('../data/user.csv')

userData = user[['food_name', 'log_date', 'time_of_day']].copy()  # Make a copy of the DataFrame

userData['food_name'] = userData['food_name'].str.lower()  # Capitalize the food names

userData['recency_days'] = (pd.to_datetime('today') - pd.to_datetime(userData['log_date'])).dt.days.clip(lower=0)

userData



Unnamed: 0,food_name,log_date,time_of_day,recency_days
0,chipotle chicken and queso bowl,2024-05-07 16:48:11,Morning,0
1,kodiak cake waffles,2024-05-07 05:04:42,Night,0
2,chipotle bowl chicken and queso,2024-05-07 02:30:12,Evening,0
3,starbucks caramel macchiato,2024-05-06 16:11:12,Morning,1
4,chicken tacos,2024-05-05 04:22:17,Evening,2
...,...,...,...,...
374,mcdonald’s cheeseburger,2023-06-28 04:24:56,Evening,314
375,small flour tortillas,2023-06-28 01:24:47,Evening,314
376,chicken,2023-06-28 01:24:47,Evening,314
377,2 percent fairlife chocolate milk,2023-06-28 01:23:43,Evening,314


In [64]:
scaler = MinMaxScaler()

WEIGHT_RECENCY = 2

userData['recency_days'] = (1 - scaler.fit_transform(userData[['recency_days']])) * WEIGHT_RECENCY
userData.drop(columns=['log_date'], inplace=True)

userData

Unnamed: 0,food_name,time_of_day,recency_days
0,chipotle chicken and queso bowl,Morning,2.000000
1,kodiak cake waffles,Night,2.000000
2,chipotle bowl chicken and queso,Evening,2.000000
3,starbucks caramel macchiato,Morning,1.993631
4,chicken tacos,Evening,1.987261
...,...,...,...
374,mcdonald’s cheeseburger,Evening,0.000000
375,small flour tortillas,Evening,0.000000
376,chicken,Evening,0.000000
377,2 percent fairlife chocolate milk,Evening,0.000000


In [65]:
df_encoded = pd.get_dummies(userData, columns=['time_of_day'])


X = df_encoded.drop(columns=['food_name'])

y = df_encoded['food_name']


X

Unnamed: 0,recency_days,time_of_day_Afternoon,time_of_day_Evening,time_of_day_Morning,time_of_day_Night
0,2.000000,False,False,True,False
1,2.000000,False,False,False,True
2,2.000000,False,True,False,False
3,1.993631,False,False,True,False
4,1.987261,False,True,False,False
...,...,...,...,...,...
374,0.000000,False,True,False,False
375,0.000000,False,True,False,False
376,0.000000,False,True,False,False
377,0.000000,False,True,False,False


In [66]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=43)

# Generate all possible combinations of day and time
times_of_day = ['Morning', 'Afternoon', 'Evening', 'Night']
all_permutations = times_of_day

# Create a DataFrame to store the permutations
manual_test = pd.DataFrame(all_permutations, columns=['time_of_day'])
manual_encoded = pd.get_dummies(manual_test, columns=['time_of_day'])

manual_encoded['recency_days'] = max(userData['recency_days'])

manual_encoded

# print columns of X_train
X_train.columns


# make same for manual_encoded
manual_encoded = manual_encoded[X_train.columns]


manual_encoded



Unnamed: 0,recency_days,time_of_day_Afternoon,time_of_day_Evening,time_of_day_Morning,time_of_day_Night
0,2.0,False,False,True,False
1,2.0,True,False,False,False
2,2.0,False,True,False,False
3,2.0,False,False,False,True


In [67]:
mlp_classifier = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=43)

mlp_classifier.fit(X_train, y_train)

# Make predictions on the manual test data with probability estimates
y_pred_proba = mlp_classifier.predict_proba(manual_encoded)

# Specify how many top predictions you want to consider
top_n = 5

# Get the indices of the top N predictions for each sample
top_n_indices = (-y_pred_proba).argsort(axis=1)[:, :top_n]

# Get the top N predictions for each sample
top_n_predictions = mlp_classifier.classes_[top_n_indices]





In [68]:
# Print the top N predictions for each sample along with their parameters and probabilities
result = []

with open("../out/time_based_output_mlp.txt", "w") as f:
    for i in range(len(manual_encoded)):
        time_of_day = manual_test.iloc[i]['time_of_day']
        f.write("Time of Day: {}\n".format(time_of_day))
        f.write("Predictions:\n")
        for j in range(top_n):
            f.write("    - Prediction {}: {}, Probability: {:.2f}%\n".format(j + 1, top_n_predictions[i][j], y_pred_proba[i][top_n_indices[i][j]] * 100))
        f.write("\n")
        result.append({
            'time_of_day': time_of_day,
        })
        for j in range(top_n):
            result[-1][f'prediction{j+1}'] = top_n_predictions[i][j]
            result[-1][f'probability{j+1}'] = y_pred_proba[i][top_n_indices[i][j]] * 100

df_result = pd.DataFrame(result)
df_result

Unnamed: 0,time_of_day,prediction1,probability1,prediction2,probability2,prediction3,probability3,prediction4,probability4,prediction5,probability5
0,Morning,cups of milk,22.574529,granola parfait,16.918915,starbucks caramel macchiato,16.628521,milk,15.823795,chipotle chicken and queso bowl,12.188527
1,Afternoon,kodiak cake waffles,6.157887,chick-fil-a breakfast burrito,5.998619,honey wheat bread,5.274514,optimum nutrition shake with 2 cups of milk,4.738355,glasses of chocolate milk,4.533267
2,Evening,chipotle bowl chicken and queso,5.724682,cups of unsweetened tea,5.216365,glasses of orange juice,4.807049,chick-fil-a granola parfait,4.524261,velvet taco chicken and waffle tacos,4.318089
3,Night,kodiak cake waffles,96.123952,chick-fil-a sandwich,1.139686,sweet tea,0.508952,chick-fil-a granola parfait,0.306351,milk,0.219056
