In [1]:
import pandas as pd
from itertools import product
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

In [2]:
userData = pd.read_csv('../data/userData.csv')
userData = userData.drop(columns=['weekday'])
userData

Unnamed: 0,food_name,time_of_day
0,chipotle chicken and queso bowl,Morning
1,kodiak cake waffles,Night
2,chipotle bowl chicken and queso,Evening
3,starbucks caramel macchiato,Morning
4,chicken tacos,Evening
...,...,...
374,mcdonald’s cheeseburger,Evening
375,small flour tortillas,Evening
376,chicken,Evening
377,2 percent fairlife chocolate milk,Evening


In [3]:

# Assuming you have your data loaded into a DataFrame called 'userData'
# Let's encode categorical variables using one-hot encoding
df_encoded = pd.get_dummies(userData, columns=['time_of_day'])

# Separate features and target variable
X = df_encoded.drop('food_name', axis=1)
y = df_encoded['food_name']


times_of_day = ['Morning', 'Afternoon', 'Evening', 'Night']

# Generate all possible combinations of day and time
all_permutations = list(times_of_day)

# print(all_permutations)

# Create a DataFrame to store the permutations
manual_test = pd.DataFrame(all_permutations, columns=['time_of_day'])
manual_encoded = pd.get_dummies(manual_test, columns=['time_of_day'])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=43)

k = 5

# Initialize the K-Nearest Neighbors (KNN) classifier
knn_classifier = KNeighborsClassifier(n_neighbors=k)

# Train the classifier
knn_classifier.fit(X_train, y_train)

time_cols = ['time_of_day_Morning', 'time_of_day_Afternoon', 'time_of_day_Evening', 'time_of_day_Night']

y_pred_proba = knn_classifier.predict_proba(manual_encoded)

top_n = 5
# # Get the indices of the top N predictions for each sample
top_n_indices = (-y_pred_proba).argsort(axis=1)[:, :top_n]

# # Get the top N predictions for each sample
top_n_predictions = knn_classifier.classes_[top_n_indices]

# # Print the top N predictions for each sample along with their parameters and probabilities

result = []

with open("../out/time_based_output_knn.txt", "w") as f:
    for i in range(len(manual_encoded)):
        # day = [col.split('_')[1] for col in day_cols if manual_encoded.iloc[i][col] == 1][0]
        time_of_day = [col.split('_')[3] for col in time_cols if manual_encoded.iloc[i][col] == 1][0]

        # f.write("Day: {}\n".format(day))
        f.write("Time of Day: {}\n".format(time_of_day))
        f.write("Predictions:\n")
        for j in range(top_n):
            f.write("    - Prediction {}: {}, Probability: {:.2f}%\n".format(j + 1, top_n_predictions[i][j], y_pred_proba[i][top_n_indices[i][j]] * 100))
        f.write("\n")
        result.append({
            # 'day': day,
            'time_of_day': time_of_day,
        })
        for j in range(top_n):  # Assuming you want top 5 predictions
            result[-1][f'prediction{j+1}'] = top_n_predictions[i][j]
            result[-1][f'probability{j+1}'] = y_pred_proba[i][top_n_indices[i][j]] * 100

df_result = pd.DataFrame(result)
df_result

df_result.to_csv('time_based_knn_results.csv', index=False)

df_result



Unnamed: 0,time_of_day,prediction1,probability1,prediction2,probability2,prediction3,probability3,prediction4,probability4,prediction5,probability5
0,Morning,protein shake with milk and peanut butter,20.0,pesto,20.0,optimum nutrition shake,20.0,milk,20.0,chick-fil-a sandwich,20.0
1,Afternoon,whataburger sweet tea,20.0,breadsticks,20.0,sausage egg and cheese,20.0,chocolate cookie,20.0,chocolate milk,20.0
2,Evening,sweet tea,40.0,mcdonald's quarter pounder,20.0,chick-fil-a sandwich,20.0,chick-fil-a nuggets,20.0,starbucks sausage biscuit,0.0
3,Night,bowl of oatmeal topped with fresh berries and ...,20.0,kodiak cake waffles,20.0,chobani yogurt,20.0,4oz chicken,20.0,starbucks sausage biscuits,20.0
