In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import json
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import (
    MultiLabelBinarizer, 
    LabelEncoder, 
    StandardScaler
)
from sklearn.metrics import mean_absolute_error, mean_squared_error  # Tambahkan baris ini
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
df = pd.read_csv("D:\\UB\\Dicoding\\GitHub\\Capstone_Akmal_Andri_Zahran\\recipes_crop.csv")

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 141788 entries, 0 to 141787
Data columns (total 27 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   RecipeId                    141788 non-null  int64  
 1   Name                        141788 non-null  object 
 2   AuthorId                    141788 non-null  int64  
 3   AuthorName                  141788 non-null  object 
 4   CookTime                    141788 non-null  object 
 5   PrepTime                    141788 non-null  object 
 6   TotalTime                   141788 non-null  object 
 7   DatePublished               141788 non-null  object 
 8   Description                 141788 non-null  object 
 9   Images                      141788 non-null  object 
 10  RecipeCategory              141788 non-null  object 
 11  Keywords                    141788 non-null  object 
 12  RecipeIngredientQuantities  141788 non-null  object 
 13  RecipeIngredie

In [4]:
CUISINE_KEYWORDS = {
    'Asian': [
        'asian', 'chinese', 'japanese', 'korean', 'thai', 'vietnamese', 
        'indonesian', 'malaysian', 'filipino', 'indian', 'cambodian', 
        'nepalese', 'pakistani', 'hunan', 'szechuan', 'cantonese'
    ],
    'Western': [
        'american', 'canadian', 'australian', 'new zealand', 'mexican', 
        'brazilian', 'chilean', 'colombian', 'peruvian', 'tex mex', 
        'southwestern u.s.', 'pennsylvania dutch'
    ],
    'European': [
        'italian', 'spanish', 'french', 'german', 'polish', 'russian', 
        'hungarian', 'czech', 'swedish', 'danish', 'norwegian', 'finnish', 
        'scottish', 'welsh', 'belgian', 'dutch', 'greek', 'austrian', 
        'swiss', 'portuguese', 'scandinavian'
    ],
    'Middle Eastern': [
        'lebanese', 'turkish', 'palestinian', 'iraqi', 'egyptian', 
        'moroccan', 'southwest asia', 'israeli'
    ],
    'African': [
        'south african', 'ethiopian', 'nigerian', 'sudanese', 'somalian', 
        'african', 'egyptian', 'moroccan'
    ],
    'Latin American': [
        'brazilian', 'chilean', 'colombian', 'costa rican', 'cuban', 
        'ecuadorean', 'guatemalan', 'honduran', 'mexican', 'peruvian', 
        'puerto rican', 'venezuelan'
    ]
}

MEAL_TYPE_KEYWORDS = {
    'Breakfast': [
        'breakfast', 'pancake', 'oatmeal', 'eggs', 'eggs breakfast', 
        'brunch', 'quick breads', 'yeast breads'
    ],
    'Lunch': [
        'lunch', 'salad', 'sandwich', 'lunch/snacks', 'light meal', 
        'college food', 'one dish meal'
    ],
    'Dinner': [
        'dinner', 'main course', 'protein', 'meat', 'stew', 'roast', 
        'poultry', 'whole chicken', 'whole turkey', 'whole duck'
    ],
    'Dessert': [
        'dessert', 'desserts easy', 'sweet', 'cake', 'cookie', 'pie', 
        'ice cream', 'cheesecake', 'frozen desserts', 'chocolate chip cookies'
    ],
    'Snack': [
        'snack', 'appetizer', 'bar cookie', 'candy', 'quick bite', 
        'spreads', 'shakes', 'smoothies'
    ]
}

DIET_KEYWORDS = {
    'Vegetarian': [
        'vegetable', 'vegan', 'egg free', 'dairy free', 'soy/tofu', 
        'tempeh', 'no meat'
    ],
    'Pescatarian': [
        'fish', 'seafood', 'salmon', 'tuna', 'tilapia', 'halibut', 
        'mahi mahi', 'catfish', 'trout', 'bass', 'crab', 'lobster', 
        'shrimp', 'oysters', 'mussels'
    ],
    'Low Calorie': [
        'low protein', 'low cholesterol', 'healthy', 'high fiber', 
        'very low carbs', 'light meal'
    ],
    'High Protein': [
        'high protein', 'protein', 'chicken breast', 'lean meat', 
        'turkey breast'
    ],
    'Special Diets': [
        'gluten free', 'lactose free', 'kosher', 'halal', 
        'no shell fish', 'no cook'
    ]
}

In [5]:
def preprocess_recipe_data(df):
    # Handle Missing Values
    df.dropna(subset=['Name', 'Calories', 'ProteinContent'], inplace=True)
    
    # Fungsi ekstraksi fitur dengan multi-label
    def extract_multi_label_features(keywords, feature_dict):
        keywords_lower = str(keywords).lower()
        matched_features = []
        
        for category, category_keywords in feature_dict.items():
            if any(kw in keywords_lower for kw in category_keywords):
                matched_features.append(category)
        
        return matched_features if matched_features else ['Other']
    
    # Fungsi untuk memastikan single label
    def get_single_label(features):
        return features[0] if features else 'Other'
    
    # Ekstraksi fitur
    df['Cuisine'] = df['Keywords'].apply(
        lambda x: get_single_label(extract_multi_label_features(x, CUISINE_KEYWORDS))
    )
    
    df['MealType'] = df.apply(
        lambda row: get_single_label(
            extract_multi_label_features(
                str(row['Name']) + ' ' + str(row['Keywords']), 
                MEAL_TYPE_KEYWORDS
            )
        ), 
        axis=1
    )
    
    df['DietType'] = df['Keywords'].apply(
        lambda x: extract_multi_label_features(x, DIET_KEYWORDS)
    )
    
    # Feature Engineering
    numeric_features = ['Calories', 'ProteinContent', 'CarbohydrateContent', 'FatContent']
    for feature in numeric_features:
        df[f'{feature}_Normalized'] = (df[feature] - df[feature].mean()) / df[feature].std()
    
    return df

In [6]:
def prepare_model_data(processed_df):
    # Multi-Label Encoding untuk fitur kategorik
    mlb_diet = MultiLabelBinarizer()
    diet_encoded = mlb_diet.fit_transform(processed_df['DietType'])
    diet_columns = [f'Diet_{col}' for col in mlb_diet.classes_]
    diet_df = pd.DataFrame(diet_encoded, columns=diet_columns)

    # Label Encoding untuk Cuisine dan MealType
    le_cuisine = LabelEncoder()
    le_meal_type = LabelEncoder()
    
    processed_df['Cuisine_Encoded'] = le_cuisine.fit_transform(processed_df['Cuisine'])
    processed_df['MealType_Encoded'] = le_meal_type.fit_transform(processed_df['MealType'])

    # Gabungkan fitur
    X = pd.concat([
        processed_df[[
            'Calories_Normalized', 
            'ProteinContent_Normalized', 
            'CarbohydrateContent_Normalized', 
            'FatContent_Normalized', 
            'Cuisine_Encoded', 
            'MealType_Encoded'
        ]],
        diet_df
    ], axis=1)

    # Target Variable
    y = processed_df['AggregatedRating']

    # Split Data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    # Scaling tambahan untuk fitur input
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    return {
        'X_train': X_train_scaled,
        'X_test': X_test_scaled,
        'y_train': y_train,
        'y_test': y_test,
        'cuisine_encoder': le_cuisine,
        'meal_type_encoder': le_meal_type,
        'diet_encoder': mlb_diet,
        'scaler': scaler,
        'feature_names': X.columns.tolist()
    }

In [7]:
def build_rating_prediction_model(input_shape):
    model = Sequential([
        Dense(128, activation='relu', input_shape=(input_shape,)),
        Dropout(0.4),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dropout(0.2),
        Dense(1, activation='linear')
    ])

    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='mean_squared_error',
        metrics=['mae']
    )

    return model

def train_rating_prediction_model(model_data):
    X_train = model_data['X_train']
    X_test = model_data['X_test']
    y_train = model_data['y_train']
    y_test = model_data['y_test']

    model = build_rating_prediction_model(X_train.shape[1])

    early_stopping = EarlyStopping(
        monitor='val_loss', 
        patience=15, 
        restore_best_weights=True
    )

    history = model.fit(
        X_train, y_train, 
        validation_split=0.2,  
        epochs=10, 
        batch_size=32,
        callbacks=[early_stopping],
        verbose=1
    )

    y_pred = model.predict(X_test).flatten()
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)

    print(f'Test MAE: {mae}')
    print(f'Test MSE: {mse}')

    return model, history

In [8]:
def match_cuisine_preference(cuisine_preference, available_cuisines):
    matched_cuisines = []
    preference_lower = cuisine_preference.lower()

    all_cuisine_keywords = {
        keyword.lower(): category 
        for category, keywords in CUISINE_KEYWORDS.items() 
        for keyword in keywords
    }

    if cuisine_preference.lower() in [cat.lower() for cat in CUISINE_KEYWORDS.keys()]:
        return [cuisine_preference]
    
    if preference_lower in all_cuisine_keywords:
        return [all_cuisine_keywords[preference_lower]]
    
    for keyword, category in all_cuisine_keywords.items():
        if keyword in preference_lower:
            matched_cuisines.append(category)
    
    return list(set(matched_cuisines)) if matched_cuisines else list(available_cuisines)

def recommend_recipe_comprehensive(user_profile, processed_df, model_data, rating_prediction_model):
    def calculate_bmi(height, weight):
        height_m = height / 100
        bmi = weight / (height_m ** 2)
        return round(bmi, 2)
    
    def categorize_bmi(bmi):
        if bmi < 18.5: return 'Underweight'
        elif 18.5 <= bmi < 25: return 'Normal'
        elif 25 <= bmi < 30: return 'Overweight'
        else: return 'Obese'
    
    def calculate_daily_calories(age, gender, weight, height, activity_level):
        if gender.lower() == 'male':
            bmr = 88.362 + (13.397 * weight) + (4.799 * height) - (5.677 * age)
        else:
            bmr = 447.593 + (9.247 * weight) + (3.098 * height) - (4.330 * age)
        
        activity_factors = {
            'sedentary': 1.2,
            'light': 1.375,
            'moderate': 1.55,
            'active': 1.725,
            'very active': 1.9
        }
        
        daily_calories = bmr * activity_factors.get(activity_level.lower(), 1.2)
        return round(daily_calories)

    def prepare_recipe_features(recipes, model_data):
        numeric_features = recipes[[
            'Calories_Normalized', 
            'ProteinContent_Normalized', 
            'CarbohydrateContent_Normalized', 
            'FatContent_Normalized'
        ]]

        cuisine_encoded = model_data['cuisine_encoder'].transform(recipes['Cuisine'])
        meal_type_encoded = model_data['meal_type_encoder'].transform(recipes['MealType'])
        diet_encoded = model_data['diet_encoder'].transform(recipes['DietType'])
        
        X = np.column_stack([
            numeric_features,
            cuisine_encoded, 
            meal_type_encoded, 
            diet_encoded
        ])
        
        return X

    bmi = calculate_bmi(user_profile['height'], user_profile['weight'])
    bmi_category = categorize_bmi(bmi)
    daily_calories = calculate_daily_calories(
        user_profile['age'], 
        user_profile['gender'], 
        user_profile['weight'], 
        user_profile['height'], 
        user_profile['activity_level']
    )

    breakfast_calories = round(daily_calories * 0.25)
    lunch_calories = round(daily_calories * 0.35)
    dinner_calories = round(daily_calories * 0.30)
    snack_calories = round(daily_calories * 0.10)

    def recommend_menu(meal_type, cuisine_preference, calories):
        min_calories = round(calories * 0.8)
        max_calories = round(calories * 1.2)

        available_cuisines = processed_df['Cuisine'].unique()
        matched_cuisines = match_cuisine_preference(cuisine_preference, available_cuisines)

        filtered_recipes = processed_df[
            (processed_df['MealType'] == meal_type) &
            (processed_df['Cuisine'].isin(matched_cuisines)) &
            (processed_df['Calories'].between(min_calories, max_calories))
        ]

        if len(filtered_recipes) > 0:
            recipe_features = prepare_recipe_features(filtered_recipes, model_data)
            predicted_ratings = rating_prediction_model.predict(recipe_features).flatten()
            
            filtered_recipes['PredictedRating'] = predicted_ratings
            recommended = filtered_recipes.sort_values('PredictedRating', ascending=False).head(3)
        else:
            recommended = filtered_recipes.head(3)

        return recommended[['Name', 'Calories', 'ProteinContent', 'AggregatedRating', 'Cuisine']]

    breakfast_menu = recommend_menu('Breakfast', user_profile['cuisine_preference'], breakfast_calories)
    lunch_menu = recommend_menu('Lunch', user_profile['cuisine_preference'], lunch_calories)
    dinner_menu = recommend_menu('Dinner', user_profile['cuisine_preference'], dinner_calories)

    return {
        'BMI': {
            'value': bmi,
            'category': bmi_category
        },
        'Daily Calories': {
            'total': daily_calories,
            'breakdown': {
                'Breakfast': breakfast_calories,
                'Lunch': lunch_calories,
                'Dinner': dinner_calories,
                'Snack': snack_calories
            }
        },
        'Recommended Menus': {
            'Breakfast': breakfast_menu,
            'Lunch': lunch_menu,
            'Dinner': dinner_menu
        }
    }

In [9]:
# Preprocessing
processed_df = preprocess_recipe_data(df)



In [10]:
# Persiapan Model
model_data = prepare_model_data(processed_df)

# Training Model
rating_prediction_model, _ = train_rating_prediction_model(model_data)



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m2836/2836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - loss: 1.9044 - mae: 0.9905 - val_loss: 0.4582 - val_mae: 0.5396
Epoch 2/10
[1m2836/2836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - loss: 0.6056 - mae: 0.5815 - val_loss: 0.4300 - val_mae: 0.4945
Epoch 3/10
[1m2836/2836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - loss: 0.5319 - mae: 0.5371 - val_loss: 0.4293 - val_mae: 0.4906
Epoch 4/10
[1m2836/2836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - loss: 0.4812 - mae: 0.5114 - val_loss: 0.4294 - val_mae: 0.4797
Epoch 5/10
[1m2836/2836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - loss: 0.4654 - mae: 0.5045 - val_loss: 0.4289 - val_mae: 0.4892
Epoch 6/10
[1m2836/2836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - loss: 0.4617 - mae: 0.5016 - val_loss: 0.4300 - val_mae: 0.4927
Epoch 7/10
[1m2836/2836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s

In [36]:
# Contoh Profil Pengguna
user_profiles = [
    {
        'age': 30,
        'gender': 'male',
        'height': 170,
        'weight': 100,
        'activity_level': 'very active',
        'cuisine_preference': 'Vegetarian, Asian'
    }
]

# Rekomendasi untuk setiap profil
for profile in user_profiles:
    print(f"\nRekomendasi untuk preferensi: {profile['cuisine_preference']}")
    recommendation = recommend_recipe_comprehensive(
        profile, 
        processed_df, 
        model_data, 
        rating_prediction_model
    )
    
    # Cetak informasi BMI dan Kalori
    print("\nBMI:")
    print(f"Nilai BMI: {recommendation['BMI']['value']}")
    print(f"Kategori BMI: {recommendation['BMI']['category']}")
    
    print("\nKebutuhan Kalori Harian:")
    print(f"Total Kalori: {recommendation['Daily Calories']['total']} kkal")
    print("Pembagian Kalori:")
    for meal, calories in recommendation['Daily Calories']['breakdown'].items():
        print(f"{meal}: {calories} kkal")
    
    print("\nRekomendasi Menu:")
    for meal_type, menu in recommendation['Recommended Menus'].items():
        print(f"\n{meal_type} Menu:")
        for recipe in menu.to_dict('records'):
            print(f"- {recipe['Name']} (Cuisine: {recipe['Cuisine']})")
            print(f"  Kalori: {recipe['Calories']} kkal")
            print(f"  Protein: {recipe['ProteinContent']} g")
            print(f"  Rating: {recipe['AggregatedRating']}")


Rekomendasi untuk preferensi: Vegetarian, Asian
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_recipes['PredictedRating'] = predicted_ratings
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_recipes['PredictedRating'] = predicted_ratings


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 

BMI:
Nilai BMI: 34.6
Kategori BMI: Obese

Kebutuhan Kalori Harian:
Total Kalori: 3940 kkal
Pembagian Kalori:
Breakfast: 985 kkal
Lunch: 1379 kkal
Dinner: 1182 kkal
Snack: 394 kkal

Rekomendasi Menu:

Breakfast Menu:
- Sticky Chilli Chicken Wings (Cuisine: Asian)
  Kalori: 1009.0 kkal
  Protein: 74.6 g
  Rating: 5.0
- Beef With Rice Noodles (Kway Teow) (Cuisine: Asian)
  Kalori: 1106.8 kkal
  Protein: 13.5 g
  Rating: 5.0
- Mom's Orange Curry Chicken (Cuisine: Asian)
  Kalori: 794.0 kkal
  Protein: 54.4 g
  Rating: 5.0

Lunch Menu:
- Thai Beef Salad (Cuisine: Asian)
  Kalori: 1259.9 kkal
  Protein: 18.2 g
  Rating: 4.5
- Thai beef salad (Cuisine: Asian)
  Kalori: 1490.3 kkal
  Protein: 82.2 g
  Rating: 5.0
- Persian Lamb or Beef  Pumpkin Stew (Cuisine: Asian)
  Kalori: 1413.7 kkal
  Protein: 87.4 g
  Rating: 5.0

Dinner Menu:
- Gluten Free Crunchy Chinese Noodles (Cuisine: Asian)
  Kalori: 1126.5 kkal
  Protein: 3.

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_recipes['PredictedRating'] = predicted_ratings
