In [12]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Baca data dari CSV
data = pd.read_csv("makanan4.csv", sep=";")

# Drop kolom yang tidak diperlukan
data = data.drop(['id', 'kode', 'sumber', 'gambar'], axis=1)

numeric_cols = ['air_gram', 'energi_kal', 'protein_gram', 'lemak_gram', 'karbohidrat_gram', 'serat_gram',
                'kalsium_mg', 'fosfor_mg', 'zatbesi_mg', 'natrium_mg', 'kalium_mg', 'tembaga_mg', 'seng_mg', 'vitc_mg']

data[numeric_cols] = data[numeric_cols].replace({',': '.'}, regex=True)

# Normalisasi data
scaler = StandardScaler()
data_normalized = scaler.fit_transform(data[numeric_cols])

# Hitung similarity matrix (Cosine Similarity)
cosine_sim = cosine_similarity(data_normalized, data_normalized)

dfCosine = pd.DataFrame(cosine_sim)
print(dfCosine)

# Fungsi untuk mendapatkan rekomendasi makanan
def get_recommendations(food_name, allergy_list):
    # Cari indeks makanan yang cocok dengan nama makanan yang diberikan
    food_index = data[data['nama_bahan'] == food_name].index[0]
    
    print(food_index)
    
    # Filter makanan berdasarkan alergi
    filtered_data = data.copy()
    for allergy in allergy_list:
        filtered_data = filtered_data[~filtered_data['nama_bahan'].str.contains(allergy, case=False)]
        filtered_data = filtered_data[~filtered_data['jenis_pangan'].str.contains(allergy, case=False)]
    
    # Hitung similarity antara makanan yang dimiliki user dengan makanan yang tersedia
    sim_scores = list(enumerate(cosine_sim[food_index]))
    
    
    # Urutkan makanan berdasarkan similarity score
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)    
    
    # Ambil 10 makanan dengan similarity score tertinggi (kecuali makanan yang dimiliki user)
    top_similar_food_indices = [i[0] for i in sim_scores[1:10]]
    top_similar_food_names = data['nama_bahan'].iloc[top_similar_food_indices].tolist()
    top_similar_food_scores = [i[1] for i in sim_scores[1:10]]  # Nilai similarity
    
    return top_similar_food_names, top_similar_food_scores

# Contoh penggunaan
allergy_list = []  # Ganti dengan alergi yang dimiliki user
food_name = 'kacang panjang kukus'  # Ganti dengan makanan yang dimiliki user
recommendations, similarity_scores = get_recommendations(food_name, allergy_list)
print("Rekomendasi makanan:")
for idx, (food, score) in enumerate(zip(recommendations, similarity_scores)):
    print(f"{idx+1}. {food} (Similarity Score: {score})")

          0         1         2         3         4         5         6     \
0     1.000000  0.510794  0.587677  0.579043 -0.479431  0.489063  0.101022   
1     0.510794  1.000000  0.535564  0.307021 -0.165394  0.085177 -0.336407   
2     0.587677  0.535564  1.000000  0.870087 -0.778481  0.709738 -0.322145   
3     0.579043  0.307021  0.870087  1.000000 -0.746974  0.869976 -0.246421   
4    -0.479431 -0.165394 -0.778481 -0.746974  1.000000 -0.633547  0.380902   
...        ...       ...       ...       ...       ...       ...       ...   
1059 -0.385342  0.073728 -0.564907 -0.523034  0.643073 -0.559502  0.246520   
1060 -0.460501  0.180206 -0.484256 -0.528441  0.698491 -0.595215  0.125032   
1061  0.623772  0.066397  0.300521  0.318279 -0.096198  0.428650  0.223414   
1062  0.618995 -0.049574  0.394036  0.387276 -0.661863  0.333887 -0.065051   
1063 -0.637071  0.077781 -0.363041 -0.639640  0.499696 -0.664500 -0.270200   

          7         8         9     ...      1054      1055    