In [None]:
import pandas as pd
import numpy as np  
from itertools import product
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, top_k_accuracy_score



In [None]:
userData = pd.read_csv('../data/userData.csv')
userData = userData.drop(columns=['weekday'])
userData

userData['time_of_day'] = pd.Categorical(userData['time_of_day'], categories=['Morning', 'Afternoon', 'Evening', 'Night'])
userData

In [None]:
encoder = LabelEncoder()

food = userData['food_name']

encoder.fit_transform(food)

encoded_y = encoder.transform(food)

encoded_y


In [None]:

# Assuming you have your data loaded into a DataFrame called 'userData'
# Let's encode categorical variables using one-hot encoding
df_encoded = pd.get_dummies(userData, columns=['time_of_day'])

# Separate features and target variable
X = df_encoded.drop('food_name', axis=1)
y = encoded_y


times_of_day = ['Morning', 'Afternoon', 'Evening', 'Night']

# Generate all possible combinations of day and time
all_permutations = list(times_of_day)

# print(all_permutations)

# Create a DataFrame to store the permutations
manual_test = pd.DataFrame(all_permutations, columns=['time_of_day'])
manual_encoded = pd.get_dummies(manual_test, columns=['time_of_day'])


# N outputs = number of unique food items
n_outputs = np.unique(y).shape[0]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=43)


# Initialize the Multi-Layer Perceptron (MLP) classifier
mlp_classifier = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=43)

# Train the classifier
mlp_classifier.fit(X, y)

# Make predictions on the test set with probability estimates
y_pred_proba = mlp_classifier.predict_proba(X_test)

# Specify how many top predictions you want to consider
top_n = 5
# Get the indices of the top N predictions for each sample
top_n_indices = (-y_pred_proba).argsort(axis=1)[:, :top_n]

# Get the top N predictions for each sample
top_n_predictions = mlp_classifier.classes_[top_n_indices]

top_n_predictions_labels = top_n_predictions.copy()

top_n_predictions_labels = top_n_predictions_labels.flatten()

top_n_predictions_labels = encoder.inverse_transform(top_n_predictions_labels)

top_n_predictions_labels = top_n_predictions_labels.reshape(top_n_predictions.shape)

In [None]:

# Assuming you have stored the day and time_of_day columns separately
# day_cols = ['weekday_Monday', 'weekday_Tuesday', 'weekday_Wednesday', 'weekday_Thursday', 'weekday_Friday', 'weekday_Saturday', 'weekday_Sunday']
time_cols = ['time_of_day_Morning', 'time_of_day_Afternoon', 'time_of_day_Evening', 'time_of_day_Night']

# Print the top N predictions for each sample along with their parameters and probabilities

result = []

with open("../out/time_based_output_mlp.txt", "w") as f:
    for i in range(len(manual_encoded)):
        # day = [col.split('_')[1] for col in day_cols if manual_encoded.iloc[i][col] == 1][0]
        time_of_day = [col.split('_')[3] for col in time_cols if manual_encoded.iloc[i][col] == 1][0]

        # f.write("Day: {}\n".format(day))
        f.write("Time of Day: {}\n".format(time_of_day))
        f.write("Predictions:\n")
        for j in range(top_n):
            f.write("    - Prediction {}: {}, Probability: {:.2f}%\n".format(j + 1, top_n_predictions_labels[i][j], y_pred_proba[i][top_n_indices[i][j]] * 100))
        f.write("\n")
        result.append({
            # 'day': day,
            'time_of_day': time_of_day,
        })
        for j in range(top_n):  # Assuming you want top 5 predictions
            result[-1][f'prediction{j+1}'] = top_n_predictions_labels[i][j]
            result[-1][f'probability{j+1}'] = y_pred_proba[i][top_n_indices[i][j]] * 100

df_result = pd.DataFrame(result)
df_result

In [None]:
#  top k accuracy score

# Specify how many top predictions you want to consider

# Get the indices of the top N predictions for each sample
top_n_indices = (-y_pred_proba).argsort(axis=1)[:, :top_n]

# Get the top N predictions for each sample
top_n_predictions = mlp_classifier.classes_[top_n_indices]

# Calculate the top k accuracy score
top_k_accuracy = top_k_accuracy_score(y_true=y_test, y_score=y_pred_proba, k=top_n, labels=np.unique(encoded_y))

print(f'Top-{top_n} Accuracy Score: {top_k_accuracy:.2f}')


