In [1]:
import pandas as pd

df = pd.read_excel('wongnai_menu_info.xlsx', sheet_name='Sheet2')
df.head()

Unnamed: 0,menu_name,ingredients,characteristics
0,Blossom Coconut Roll (8512013066),"แป้งเค้ก, มะพร้าวขูด, น้ำตาล, ไข่, เนย","หวาน, มัน"
1,Lychee Rose Juice Mixed with Pomegranate (8512...,"ลิ้นจี่, น้ำกุหลาบ, น้ำทับทิม, น้ำตาล, น้ำแข็ง","หวาน, เปรี้ยว, มัน"
2,Mango Yogurt Cake (8512003482),"มะม่วง, โยเกิร์ต, แป้งเค้ก, น้ำตาล, ไข่, เนย","หวาน, มัน"
3,Honey Nougat Cream Frappuccino (3PL0013920),"นม, น้ำผึ้ง, นูกัต, ครีม, กาแฟ, น้ำแข็ง","หวาน, มัน"
4,Honey Nougat Oatmilk Macchiato (3PL0013861),"นมข้าวโอ๊ต, น้ำผึ้ง, นูกัต, กาแฟ","หวาน, มัน"


In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer

df_sheet2 = df.copy()

# Combine ingredients and characteristics into a single text field for embeddings
df_sheet2["combined_features"] = df_sheet2["ingredients"] + ", " + df_sheet2["characteristics"]

# Initialize TF-IDF Vectorizer
vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split(", "))

# Fit and transform the text data into embeddings
tfidf_matrix = vectorizer.fit_transform(df_sheet2["combined_features"])

# Convert the matrix to a DataFrame for better readability
feature_names = vectorizer.get_feature_names_out()
df_tfidf = pd.DataFrame(tfidf_matrix.toarray(), columns=feature_names)



In [3]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute the cosine similarity matrix
cosine_sim_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Convert to DataFrame for better readability
df_cosine_sim = pd.DataFrame(cosine_sim_matrix, index=df_sheet2["menu_name"], columns=df_sheet2["menu_name"])


In [4]:
# Function to recommend similar menus based on liked menu
def get_recommendations(liked_menu, df_cosine_sim, top_n=5):
    if liked_menu not in df_cosine_sim.index:
        return f"Menu '{liked_menu}' not found in dataset."
    
    # Get similarity scores for the liked menu
    sim_scores = df_cosine_sim[liked_menu].sort_values(ascending=False)
    
    # Exclude the liked menu itself and get top N recommendations
    recommended_menus = sim_scores.iloc[1:top_n+1].index.tolist()
    
    return recommended_menus

# Display function to allow user input
def recommend_based_on_input(liked_menu):
    recommendations = get_recommendations(liked_menu, df_cosine_sim)
    return recommendations

# Display menu names for user reference
menu_list = df_sheet2["menu_name"].tolist()


In [11]:
recommend_based_on_input("Blossom Coconut Roll (8512013066)")

#Show the ingredients and characteristics of selected menu
df_sheet2[df_sheet2["menu_name"] == "Blossom Coconut Roll (8512013066)"][["menu_name", "ingredients", "characteristics"]]

Unnamed: 0,menu_name,ingredients,characteristics
0,Blossom Coconut Roll (8512013066),"แป้งเค้ก, มะพร้าวขูด, น้ำตาล, ไข่, เนย","หวาน, มัน"


In [10]:
#Show the ingredients and characteristics of the recommended menu
df_sheet2[df_sheet2["menu_name"].isin(recommend_based_on_input("Blossom Coconut Roll (8512013066)"))][["menu_name", "ingredients", "characteristics"]]

Unnamed: 0,menu_name,ingredients,characteristics
2,Mango Yogurt Cake (8512003482),"มะม่วง, โยเกิร์ต, แป้งเค้ก, น้ำตาล, ไข่, เนย","หวาน, มัน"
102,Glazed Donut (8512004610),"แป้ง, น้ำตาล, ไข่, นม","หวาน, มัน"
104,Chocolate Glazed Donut (8512005743),"แป้ง, ช็อกโกแลต, น้ำตาล, ไข่, นม","หวาน, ขม, มัน"
122,Croque Monsieur (8512013938),"ขนมปัง, แฮม, ชีส, ครีม, เนย","เค็ม, มัน, หอม"
184,ข้าวผัดกุ้ง,"ข้าว, กุ้ง, ไข่, น้ำปลา","เค็ม, มัน"
