In [1]:
!pip install pulp --quiet
!pip install Sastrawi --quiet
import pulp
print(pulp.__version__)

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.7/17.7 MB[0m [31m76.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.7/209.7 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h2.9.0


In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from pulp import *
import pickle

import warnings
warnings.filterwarnings('ignore')

In [5]:
df = pd.read_csv('data_baby_food.csv')
df

Unnamed: 0,Recommended_Age_Start(months),Recommended_Age_End(months),Food(per 100g),Calcium(mg),Protein(g),Carbohydrate(g),Fat(g),Calorie(kcal),Price_per_product(IDR),weight_product(g),Notes
0,0,6,ASI (Air Susu Ibu),34,1.2,7.0,4.2,70,0,100,ASI Eksklusif direkomendasikan oleh WHO sebaga...
1,0,6,Susu Formula Bayi 0-6 bulan,50,1.5,7.2,3.6,67,15800,150,Contoh merek: Bebelac Gold 1 SGM 1 Dancow 1. I...
2,6,12,Susu Formula Lanjutan 6-12 bulan,65,1.8,8.0,3.8,72,15800,150,Contoh merek: Bebelac Gold 2 SGM 2 Dancow 2. D...
3,6,8,Bubur Susu Beras Merah,45,3.5,25.0,1.0,123,7100,120,MPASI awal dengan tekstur halus. Campur beras ...
4,6,8,Bubur Susu Beras Putih,40,3.2,24.0,0.9,120,10500,120,MPASI awal dengan tekstur halus. Campur beras ...
...,...,...,...,...,...,...,...,...,...,...,...
71,9,12,Tumis Bayam dan Tahu,80,5.0,5.0,3.0,80,12000,250,Bayam dan tahu yang ditumis dan dihaluskan. Su...
72,12,60,Bubur Kacang Merah,90,8.0,29.0,0.5,160,12000,250,Kacang merah yang direbus dan dihaluskan denga...
73,12,60,Tim Tempe Kedelai,110,18.0,10.0,6.0,160,15000,250,Tempe yang dihaluskan dan dikukus. Tinggi prot...
74,12,60,Pure Buah Bit dan Apel,30,1.5,12.5,0.3,60,15000,250,Buah bit dan apel yang dikukus dan dihaluskan....


In [6]:
# Preprocessing data
df = df.fillna(0)
# Normalisasi fitur nutrisi dan harga untuk menjaga skala konsisten
features_to_normalize = ['Calcium(mg)', 'Protein(g)', 'Carbohydrate(g)',
                         'Fat(g)', 'Calorie(kcal)', 'Price_per_product(IDR)']
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(df[features_to_normalize])
df[features_to_normalize] = scaled_features


In [7]:
with open('models/recommend_food/scaler_food_recommendation.pkl', 'wb') as f:
    pickle.dump(scaler, f)

In [8]:
df.head()

Unnamed: 0,Recommended_Age_Start(months),Recommended_Age_End(months),Food(per 100g),Calcium(mg),Protein(g),Carbohydrate(g),Fat(g),Calorie(kcal),Price_per_product(IDR),weight_product(g),Notes
0,0,6,ASI (Air Susu Ibu),0.073059,0.036145,0.081776,0.280822,0.131105,0.0,100,ASI Eksklusif direkomendasikan oleh WHO sebaga...
1,0,6,Susu Formula Bayi 0-6 bulan,0.109589,0.048193,0.084112,0.239726,0.123393,0.351111,150,Contoh merek: Bebelac Gold 1 SGM 1 Dancow 1. I...
2,6,12,Susu Formula Lanjutan 6-12 bulan,0.143836,0.060241,0.093458,0.253425,0.136247,0.351111,150,Contoh merek: Bebelac Gold 2 SGM 2 Dancow 2. D...
3,6,8,Bubur Susu Beras Merah,0.098174,0.128514,0.292056,0.061644,0.267352,0.157778,120,MPASI awal dengan tekstur halus. Campur beras ...
4,6,8,Bubur Susu Beras Putih,0.086758,0.116466,0.280374,0.054795,0.25964,0.233333,120,MPASI awal dengan tekstur halus. Campur beras ...


In [11]:
# Memuat daftar stop words bahasa Indonesia untuk pengolahan teks
stopword_factory = StopWordRemoverFactory()
stop_words_id = stopword_factory.get_stop_words()

# Membuat fitur TF-IDF dari kolom "Notes" (deskripsi makanan)
tfidf = TfidfVectorizer(stop_words=stop_words_id)
tfidf_matrix = tfidf.fit_transform(df['Notes'])

# Fungsi untuk membuat matriks kesamaan hybrid berbasis nutrisi dan deskripsi makanan
def create_hybrid_similarity_matrix(df, tfidf_matrix, content_weight=0.7):
    """
    Membuat matriks kesamaan hybrid yang menggabungkan kesamaan nutrisi dan deskripsi makanan.

    Args:
        df (pd.DataFrame): Data makanan bayi
        tfidf_matrix (sparse matrix): Matriks TF-IDF dari deskripsi makanan
        content_weight (float): Bobot kesamaan nutrisi (0.0 - 1.0)

    Returns:
        np.ndarray: Matriks kesamaan hybrid
    """
    nutritional_features = ['Calcium(mg)', 'Protein(g)', 'Carbohydrate(g)',
                           'Fat(g)', 'Calorie(kcal)']

    # Kesamaan berbasis nutrisi
    content_similarity = cosine_similarity(df[nutritional_features])

    # Kesamaan berbasis TF-IDF
    tfidf_similarity = cosine_similarity(tfidf_matrix)

    # Menggabungkan kedua kesamaan
    hybrid_similarity = (content_weight * content_similarity +
                         (1 - content_weight) * tfidf_similarity)
    return hybrid_similarity

# Membuat matriks kesamaan hybrid
hybrid_similarity = create_hybrid_similarity_matrix(df, tfidf_matrix)

In [12]:
# Menyimpan matriks kesamaan hybrid untuk digunakan kembali
with open('models/recommend_food/hybrid_similarity_matrix_food_recommendation.pkl', 'wb') as f:
    pickle.dump(hybrid_similarity, f)

In [13]:
def get_hybrid_recommendations(age_months, daily_needs, daily_budget, user_preferences=None):
    """
    Mendapatkan rekomendasi makanan berdasarkan usia bayi, kebutuhan nutrisi harian,
    anggaran harian, dan preferensi pengguna.

    Args:
        age_months (int): Usia bayi dalam bulan
        daily_needs (dict): Kebutuhan nutrisi harian (kalori, protein, karbohidrat, lemak)
        daily_budget (int): Anggaran harian dalam IDR
        user_preferences (list): Daftar makanan favorit pengguna (opsional)

    Returns:
        tuple: Rekomendasi makanan dan informasi total nutrisi, sisa anggaran
    """
    # Filter based on age
    filtered_df = df[
        (df['Recommended_Age_Start(months)'] <= age_months) &
        (df['Recommended_Age_End(months)'] >= age_months)
    ]

    if filtered_df.empty:
        return "No food matches the age criteria", {}

    # If user preferences exist, adjust recommendations using collaborative filtering
    if user_preferences is not None:
        pref_indices = [i for i, food in enumerate(df['Food(per 100g)'])
                       if food in user_preferences]
        if pref_indices:
            similarity_scores = hybrid_similarity[pref_indices].mean(axis=0)
            filtered_df['similarity_score'] = similarity_scores[filtered_df.index]
            filtered_df = filtered_df.sort_values('similarity_score', ascending=False)

    # Denormalize prices and nutrients
    original_values = scaler.inverse_transform(filtered_df[features_to_normalize])
    for i, feature in enumerate(features_to_normalize):
        filtered_df[f'Original_{feature}'] = original_values[:, i]

    # Calculate nutrients per package
    for nutri in ['Calcium(mg)', 'Protein(g)', 'Carbohydrate(g)', 'Fat(g)', 'Calorie(kcal)']:
        filtered_df[f'Original_{nutri}_per_package'] = (
            filtered_df[f'Original_{nutri}'] / 100) * filtered_df['weight_product(g)']

    # Linear Programming setup
    prob = LpProblem("Hybrid_Food_Recommendation", LpMinimize)
    food_vars = LpVariable.dicts("Food", filtered_df.index, lowBound=0, cat='Integer')

    # Objective: Minimize quantity while maximizing similarity (if available)
    if 'similarity_score' in filtered_df.columns:
        prob += lpSum([food_vars[i] * (1 - filtered_df.loc[i, 'similarity_score'])
                      for i in filtered_df.index])
    else:
        prob += lpSum([food_vars[i] for i in filtered_df.index])

    # Nutrient constraints for age 0-5 months
    if age_months < 6:
        # Budget constraints
        prob += lpSum([filtered_df.loc[i, 'Original_Price_per_product(IDR)'] * food_vars[i]
                      for i in filtered_df.index]) <= daily_budget * 1.8

        # Calories
        prob += lpSum([filtered_df.loc[i, 'Original_Calorie(kcal)_per_package'] * food_vars[i]
                      for i in filtered_df.index]) <= daily_needs['calorie'] * 1.5
        prob += lpSum([filtered_df.loc[i, 'Original_Calorie(kcal)_per_package'] * food_vars[i]
                      for i in filtered_df.index]) >= daily_needs['calorie'] * 0.8

        # Protein
        prob += lpSum([filtered_df.loc[i, 'Original_Protein(g)_per_package'] * food_vars[i]
                      for i in filtered_df.index]) <= daily_needs['protein'] * 2.0
        prob += lpSum([filtered_df.loc[i, 'Original_Protein(g)_per_package'] * food_vars[i]
                      for i in filtered_df.index]) >= daily_needs['protein'] * 0.5

        # Other nutrients
        for nutrient, need in [('Carbohydrate(g)', 'carb'), ('Fat(g)', 'fat')]:
            prob += lpSum([filtered_df.loc[i, f'Original_{nutrient}_per_package'] * food_vars[i]
                          for i in filtered_df.index]) <= daily_needs[need] * 1.5
            prob += lpSum([filtered_df.loc[i, f'Original_{nutrient}_per_package'] * food_vars[i]
                          for i in filtered_df.index]) >= daily_needs[need] * 0.5

        # Ensure minimum ASI if in preferences
        if 'ASI (Air Susu Ibu)' in user_preferences:
            asi_index = filtered_df[filtered_df['Food(per 100g)'] == 'ASI (Air Susu Ibu)'].index
            if len(asi_index) > 0:
                prob += food_vars[asi_index[0]] >= 1

        # Pastikan ASI diprioritaskan
        asi_index = filtered_df[filtered_df['Food(per 100g)'] == 'ASI (Air Susu Ibu)'].index
        formula_index = filtered_df[filtered_df['Food(per 100g)'] == 'Susu Formula Bayi 0-6 bulan'].index

        if len(asi_index) > 0:
            # Memastikan minimal 1 porsi ASI
            prob += food_vars[asi_index[0]] >= 1

            # Jika ada susu formula, batasi penggunaannya
            if len(formula_index) > 0:
                # Pastikan jumlah formula tidak melebihi ASI
                prob += food_vars[formula_index[0]] <= food_vars[asi_index[0]] * 0.5

                # Atau bisa juga membatasi formula dengan nilai maksimal
                prob += food_vars[formula_index[0]] <= 1

    else:
        # Budget constraints
        prob += lpSum([filtered_df.loc[i, 'Original_Price_per_product(IDR)'] * food_vars[i]
                      for i in filtered_df.index]) <= daily_budget * 1.5
        prob += lpSum([filtered_df.loc[i, 'Original_Price_per_product(IDR)'] * food_vars[i]
                      for i in filtered_df.index]) >= daily_budget * 0.8

        # Nutrient constraints
        # Calories (80-120%)
        prob += lpSum([filtered_df.loc[i, 'Original_Calorie(kcal)_per_package'] * food_vars[i]
                      for i in filtered_df.index]) <= daily_needs['calorie'] * 1.2
        prob += lpSum([filtered_df.loc[i, 'Original_Calorie(kcal)_per_package'] * food_vars[i]
                      for i in filtered_df.index]) >= daily_needs['calorie'] * 0.8

        # Protein (80-200%)
        prob += lpSum([filtered_df.loc[i, 'Original_Protein(g)_per_package'] * food_vars[i]
                      for i in filtered_df.index]) <= daily_needs['protein'] * 2.0
        prob += lpSum([filtered_df.loc[i, 'Original_Protein(g)_per_package'] * food_vars[i]
                      for i in filtered_df.index]) >= daily_needs['protein'] * 0.8

        # Other nutrients (80-120%)
        for nutrient, need in [('Carbohydrate(g)', 'carb'), ('Fat(g)', 'fat')]:
            prob += lpSum([filtered_df.loc[i, f'Original_{nutrient}_per_package'] * food_vars[i]
                          for i in filtered_df.index]) <= daily_needs[need] * 1.2
            prob += lpSum([filtered_df.loc[i, f'Original_{nutrient}_per_package'] * food_vars[i]
                          for i in filtered_df.index]) >= daily_needs[need] * 0.8


    # Solve model
    prob.solve(PULP_CBC_CMD(msg=0))

    return process_results(prob, filtered_df, food_vars, daily_budget)

def process_results(prob, filtered_df, food_vars, daily_budget):
    """
    Memproses hasil dari solver Linear Programming.

    Args:
        prob: Model Linear Programming yang sudah diselesaikan
        filtered_df (DataFrame): Data makanan yang difilter
        food_vars (dict): Variabel makanan hasil optimisasi
        daily_budget (int): Anggaran harian dalam IDR

    Returns:
        tuple: DataFrame hasil rekomendasi, dan informasi nutrisi total serta sisa anggaran
    """
    if LpStatus[prob.status] == 'Optimal':
        # Ambil makanan yang terpilih berdasarkan hasil solver
        selected_foods = [(i, value(food_vars[i]))
                         for i in filtered_df.index if value(food_vars[i]) > 0.01]

        results = []
        for idx, qty in selected_foods:
            food_data = filtered_df.loc[idx].copy()
            food_data['Quantity'] = qty
            food_data['Total_Price'] = qty * food_data['Original_Price_per_product(IDR)']
            results.append(food_data)

        final_recommendations = pd.DataFrame(results)

        # Rename columns
        column_mapping = {
            'Food(per 100g)': 'Nama_Makanan',
            'Quantity': 'Banyak_produk',
            'Original_Price_per_product(IDR)': 'Harga_per_Porsi(IDR)',
            'Total_Price': 'Total_Harga',
            'Original_Calorie(kcal)_per_package': 'Kalori_per_Porsi(kcal)',
            'Original_Protein(g)_per_package': 'Protein_per_Porsi(gr)',
            'Original_Carbohydrate(g)_per_package': 'Karbohidrat_per_Porsi(gr)',
            'Original_Fat(g)_per_package': 'Lemak_per_Porsi(gr)',
            'weight_product(g)': 'Berat_per_Produk(gr)'
        }

        final_recommendations = final_recommendations.rename(columns=column_mapping)

        # Calculate totals
        total_nutrients = {
            'Kalori': sum(row['Kalori_per_Porsi(kcal)'] * row['Banyak_produk']
                         for _, row in final_recommendations.iterrows()),
            'Protein': sum(row['Protein_per_Porsi(gr)'] * row['Banyak_produk']
                          for _, row in final_recommendations.iterrows()),
            'Karbohidrat': sum(row['Karbohidrat_per_Porsi(gr)'] * row['Banyak_produk']
                              for _, row in final_recommendations.iterrows()),
            'Lemak': sum(row['Lemak_per_Porsi(gr)'] * row['Banyak_produk']
                        for _, row in final_recommendations.iterrows())
        }

        total_cost = final_recommendations['Total_Harga'].sum()
        remaining_budget = daily_budget - total_cost

        return final_recommendations[['Nama_Makanan', 'Banyak_produk',
                                    'Berat_per_Produk(gr)',  # Menambahkan kolom berat ke output
                                    'Harga_per_Porsi(IDR)', 'Total_Harga',
                                    'Kalori_per_Porsi(kcal)', 'Protein_per_Porsi(gr)',
                                    'Karbohidrat_per_Porsi(gr)', 'Lemak_per_Porsi(gr)',
                                    'Notes']], {
            'Total_Nutrients': total_nutrients,
            'Remaining_Budget': remaining_budget
        }
    else:
        return "No optimal solution found", {}



In [14]:
# Contoh penggunaan dengan preferensi pengguna
# daily_needs = {
#     'calorie': 700.5,
#     'protein': 20.1,
#     'carb': 120.5,
#     'fat': 30.2
# }

# user_preferences = ['Biskuit Bayi (promina)']
daily_needs = {
    'calorie': 450,
    'protein': 9.5,
    'carb': 31,
    'fat': 48
}
user_preferences = ['ASI (Air Susu Ibu)']
recommendations, summary = get_hybrid_recommendations(
    age_months=3,
    daily_needs=daily_needs,
    daily_budget=50000,
    user_preferences=user_preferences
)
print("Rekomendasi Makanan:")
print(recommendations)
if isinstance(recommendations, str):
    print(recommendations)
else:
    print("\nRingkasan Nutrisi:")
    if 'Total_Nutrients' in summary:
        print(f"Total nutrisi terkumpul: {summary['Total_Nutrients']}")
    if 'Remaining_Budget' in summary:
        print(f"Sisa budget: Rp {summary['Remaining_Budget']:,.2f}")

Rekomendasi Makanan:
         Nama_Makanan  Banyak_produk  Berat_per_Produk(gr)  \
0  ASI (Air Susu Ibu)            6.0                   100   

   Harga_per_Porsi(IDR)  Total_Harga  Kalori_per_Porsi(kcal)  \
0                   0.0          0.0                    70.0   

   Protein_per_Porsi(gr)  Karbohidrat_per_Porsi(gr)  Lemak_per_Porsi(gr)  \
0                    1.2                        7.0                  4.2   

                                               Notes  
0  ASI Eksklusif direkomendasikan oleh WHO sebaga...  

Ringkasan Nutrisi:
Total nutrisi terkumpul: {'Kalori': 419.9999999999999, 'Protein': 7.199999999999999, 'Karbohidrat': 42.00000000000001, 'Lemak': 25.200000000000006}
Sisa budget: Rp 50,000.00
