In [None]:
import pandas as pd
import random
import uuid
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
food_data = pd.read_csv('/content/data_nutrisi_2.csv')

In [None]:
food_data = food_data.drop('air', axis = 1)

In [None]:
food_data

Unnamed: 0,makanan,kalori,protein,lemak,karbohidrat,serat,Kategori,vegan/nonvegan
0,Nasi,180,3.0,0.3,39.8,0.2,Serealia,Vegan
1,Nasi tim,120,2.4,0.4,26.0,0.5,Serealia,Vegan
2,Tapai beras,99,1.7,0.3,22.4,0.0,Serealia,Vegan
3,Nasi beras merah,149,2.8,0.4,32.5,0.3,Serealia,Vegan
4,Bihun goreng instan,381,6.1,3.9,80.3,0.0,Serealia,Vegan
...,...,...,...,...,...,...,...,...
587,"Sukun tua, segar",126,1.6,0.2,24.5,1.5,Buah,Vegan
588,"Vigus, segar",44,2.4,0.1,8.3,0.0,Buah,Vegan
589,"Wani, segar",74,0.8,0.3,17.1,3.1,Buah,Vegan
590,"Mentimun Suri, segar",16,1.3,0.0,2.1,0.9,Buah,Vegan


In [None]:
numeric_cols = ['protein', 'lemak', 'karbohidrat', 'serat']

def get_top_2_nutrients(row):
    numeric_values = row[numeric_cols].apply(pd.to_numeric, errors='coerce')
    numeric_values = numeric_values.dropna()
    if len(numeric_values) < 2:
        return ''
    top_2 = numeric_values.nlargest(2)
    return ', '.join(top_2.index)

food_data['2_nutrisi_paling_banyak'] = food_data.apply(get_top_2_nutrients, axis=1)

food_data

Unnamed: 0,makanan,kalori,protein,lemak,karbohidrat,serat,Kategori,vegan/nonvegan,2_nutrisi_paling_banyak
0,Nasi,180,3.0,0.3,39.8,0.2,Serealia,Vegan,"karbohidrat, protein"
1,Nasi tim,120,2.4,0.4,26.0,0.5,Serealia,Vegan,"karbohidrat, protein"
2,Tapai beras,99,1.7,0.3,22.4,0.0,Serealia,Vegan,"karbohidrat, protein"
3,Nasi beras merah,149,2.8,0.4,32.5,0.3,Serealia,Vegan,"karbohidrat, protein"
4,Bihun goreng instan,381,6.1,3.9,80.3,0.0,Serealia,Vegan,"karbohidrat, protein"
...,...,...,...,...,...,...,...,...,...
587,"Sukun tua, segar",126,1.6,0.2,24.5,1.5,Buah,Vegan,"karbohidrat, protein"
588,"Vigus, segar",44,2.4,0.1,8.3,0.0,Buah,Vegan,"karbohidrat, protein"
589,"Wani, segar",74,0.8,0.3,17.1,3.1,Buah,Vegan,"karbohidrat, serat"
590,"Mentimun Suri, segar",16,1.3,0.0,2.1,0.9,Buah,Vegan,"karbohidrat, protein"


In [None]:
food_data = food_data.drop(['kalori', 'protein', 'lemak', 'karbohidrat', 'serat'], axis =1)

In [None]:
food_data.columns

Index(['makanan', 'Kategori', 'vegan/nonvegan', '2_nutrisi_paling_banyak'], dtype='object')

In [None]:
food_data['tag'] = ''

for index,row in food_data.iterrows():
    vegan = ' '.join(row['vegan/nonvegan'].split(',')).lower()
    nutrisi = ' '.join(row['2_nutrisi_paling_banyak'].replace(' ','').split(',')).lower()
    row['tag'] =  vegan + ' ' + nutrisi 

recommend = food_data[['makanan','Kategori','tag']]
recommend

Unnamed: 0,makanan,Kategori,tag
0,Nasi,Serealia,vegan karbohidrat protein
1,Nasi tim,Serealia,vegan karbohidrat protein
2,Tapai beras,Serealia,vegan karbohidrat protein
3,Nasi beras merah,Serealia,vegan karbohidrat protein
4,Bihun goreng instan,Serealia,vegan karbohidrat protein
...,...,...,...
587,"Sukun tua, segar",Buah,vegan karbohidrat protein
588,"Vigus, segar",Buah,vegan karbohidrat protein
589,"Wani, segar",Buah,vegan karbohidrat serat
590,"Mentimun Suri, segar",Buah,vegan karbohidrat protein


In [None]:

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors


selected_features = ['Kategori', 'tag']

features = recommend[selected_features]
vectorizer = TfidfVectorizer()
deskripsi_embedding = vectorizer.fit_transform(features['tag'])
features['deskripsi_embedding'] = [v.toarray() for v in deskripsi_embedding]
deskripsi_array = np.concatenate(features['deskripsi_embedding'], axis=0)
model = NearestNeighbors(n_neighbors=10, metric='cosine')


model.fit(deskripsi_array)


user_query = "vegan karbohidrat protein"

user_query_embedding = vectorizer.transform([user_query]).toarray()


distances, indices = model.kneighbors(user_query_embedding)

top_10_food = food_data.iloc[indices[0]]

top_10_food


Unnamed: 0,makanan,Kategori,vegan/nonvegan,2_nutrisi_paling_banyak,tag
367,Kembang tahu,Kacang,Vegan,"protein, karbohidrat",vegan protein karbohidrat
355,Emping komak,Kacang,Vegan,"karbohidrat, protein",vegan karbohidrat protein
96,Spaghetti,Serealia,Vegan,"karbohidrat, protein",vegan karbohidrat protein
382,Oncom ampas kacang hijau,Kacang,Vegan,"karbohidrat, protein",vegan karbohidrat protein
0,Nasi,Serealia,Vegan,"karbohidrat, protein",vegan karbohidrat protein
381,Oncom,Kacang,Vegan,"karbohidrat, protein",vegan karbohidrat protein
100,Tipa-tipa,Serealia,Vegan,"karbohidrat, protein",vegan karbohidrat protein
368,Kembang tahu rebus,Kacang,Vegan,"protein, karbohidrat",vegan protein karbohidrat
94,Roti warna sawo matang,Serealia,Vegan,"karbohidrat, protein",vegan karbohidrat protein
106,"Bayam, kukus",Sayur,Vegan,"karbohidrat, protein",vegan karbohidrat protein
