In [None]:
# Cell 1: Instalasi Library yang Dibutuhkan
!pip install tensorflow==2.15.0 tf-keras==2.15.1 keras==2.15.0 numpy==1.26.4 protobuf==4.25.7 h5py==3.13.0 pandas==2.2.2 scikit-learn==1.6.1 keras-tuner==1.4.7 matplotlib==3.10.3 seaborn==0.13.2 tensorflowjs==4.22.0 kagglehub==0.3.12 split-folders==0.5.1 tensorflow-text==2.15.0 dopamine-rl==4.0.7 thinc==8.2.3 grpcio-status==1.59.0 grpcio==1.59.0 packaging==23.2 tensorflow-decision-forests==1.8.1 spacy==3.7.5 google-cloud-bigquery==3.17.0 ml-dtypes==0.2.0 wrapt==1.14.1 tensorboard==2.15.2 tensorflow-estimator==2.15.0 --upgrade --no-cache-dir
print("Instalasi library selesai.")

In [None]:
# Cell 2: Imports and Setup with Updated Authentication
# Import necessary libraries (yang belum di-import di sel instalasi atau yang spesifik untuk blok ini)
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import seaborn as sns
import re
import os
import json
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import joblib # For saving scalers
import shutil # For zipping files

# For Google Colab integration
from google.colab import auth, files
import gspread
from google.auth import default # Updated import for gspread authentication

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Authenticate user
auth.authenticate_user()

# Get default credentials and authorize gspread
creds, _ = default()
gc = gspread.authorize(creds)

print("✅ Libraries imported and Google Sheets authorized using google-auth.")

In [None]:
# Cell 3: Load Data Configuration and Execution

# --- USER INPUT REQUIRED (Updated for data from scraping script) ---
#@markdown Enter the name of your Google Sheet (output from scraping script):
GOOGLE_SHEET_NAME = "Scrap_Food_Datasets" #@param {type:"string"}
#@markdown Enter the name of the worksheet containing the scraped nutrition data:
WORKSHEET_NAME = "Result_Food_Name_List" #@param {type:"string"}
# --- END USER INPUT ---

def load_data_from_google_sheet(sheet_name, worksheet_name):
    """Loads data from a Google Sheet into a pandas DataFrame."""
    try:
        print(f"Attempting to open spreadsheet: '{sheet_name}', worksheet: '{worksheet_name}'")
        spreadsheet_obj = gc.open(sheet_name) # gc should be defined from the previous cell
        worksheet = spreadsheet_obj.worksheet(worksheet_name)
        data = worksheet.get_all_values()
        if not data:
            print(f"❌ Worksheet '{worksheet_name}' is empty or data could not be fetched.")
            return pd.DataFrame()
        headers = data.pop(0)
        df = pd.DataFrame(data, columns=headers)
        df.columns = [col.strip() for col in df.columns] # Clean column names
        print(f"✅ Data loaded successfully from Google Sheet: '{sheet_name}' (Worksheet: '{worksheet_name}')")
        return df
    except gspread.exceptions.SpreadsheetNotFound:
        print(f"❌ Error: Spreadsheet '{sheet_name}' not found. Please check the name and sharing permissions.")
        return pd.DataFrame()
    except gspread.exceptions.WorksheetNotFound:
        print(f"❌ Error: Worksheet '{worksheet_name}' not found in Spreadsheet '{sheet_name}'.")
        return pd.DataFrame()
    except Exception as e:
        print(f"❌ Error loading data from Google Sheet: {e}")
        print("⚠️ Please ensure the Google Sheet name and worksheet name are correct, and that the sheet is shared appropriately.")
        return pd.DataFrame()

# Load the raw data
df_raw = load_data_from_google_sheet(GOOGLE_SHEET_NAME, WORKSHEET_NAME)

if not df_raw.empty:
    print("\n📋 Sample of loaded data (df_raw):")
    print(df_raw.head())
    print(f"\nShape of loaded data: {df_raw.shape}")
    print(f"\nColumns in loaded data: {df_raw.columns.tolist()}")
else:
    print("⚠️ Data loading failed. Subsequent cells may not work correctly.")

In [None]:
# Cell 4: Define Data Preprocessing Functions (NaN in Nutrition Columns to 0) (MODIFIED)
# Helper functions to extract region, availability and origin information from the 'label' column
def extract_region(label_text):
    if pd.isna(label_text):
        return "Unknown"
    region_match = re.search(r'Region: (.*?)($|;|,)', str(label_text))
    if region_match:
        return region_match.group(1).strip()
    if "General" in str(label_text) or "Umum di:" in str(label_text):
        return "General"
    return "Unknown"

def extract_availability(label_text):
    if pd.isna(label_text):
        return "Unknown"
    if "General" in str(label_text):
        return "General"
    availability_match = re.search(r'Umum di: (.*?)($|;|,)', str(label_text))
    if availability_match:
        return availability_match.group(1).strip()
    return "Specific"

def extract_origin(label_text):
    if pd.isna(label_text):
        return "Unknown"
    origin_match = re.search(r'Asli: (.*?)($|;|,)', str(label_text))
    if origin_match:
        return origin_match.group(1).strip()
    return "Unknown"

# Data preprocessing function
def preprocess_data(df_input):
    data = df_input.copy()
    # MODIFIED: Use 'label' column for extracting region, availability, and origin
    if 'label' not in data.columns:
        print("❌ Error: 'label' column not found. Cannot extract region, availability, and origin features.")
        # Add empty columns if 'label' is missing to prevent downstream errors, though they will be 'Unknown'
        data['region'] = "Unknown"
        data['availability'] = "Unknown"
        data['origin'] = "Unknown"
    else:
        print("ℹ️ Extracting region, availability, and origin from 'label' column.")
        data['region'] = data['label'].apply(extract_region)
        data['availability'] = data['label'].apply(extract_availability)
        data['origin'] = data['label'].apply(extract_origin)

    # These are the nutritional columns the model expects for processing
    numeric_cols = ['kalori (kkal)', 'energi (kj)', 'lemak (g)', 'lemak jenuh (g)',
                    'lemak tak jenuh ganda (g)', 'lemak tak jenuh tunggal (g)',
                    'kolesterol (mg)', 'protein (g)', 'karbohidrat (g)', 'serat (g)',
                    'gula (g)', 'sodium (mg)', 'kalium (mg)']

    print("\n🔍 Checking for expected numeric columns in the loaded data:")
    for col_check in numeric_cols:
        if col_check not in data.columns:
            print(f"  ❌ Expected numeric column '{col_check}' NOT FOUND in loaded data. Model training might fail or be inaccurate.")

    for col in numeric_cols:
        if col in data.columns:
            data[col] = data[col].astype(str).str.replace(',', '.', regex=False).str.strip()
            data[col] = pd.to_numeric(data[col], errors='coerce')
            data[col] = data[col].fillna(0)
            print(f"  ℹ️ Column '{col}': NaN values filled with 0.")

    unique_regions = data['region'].dropna().unique()
    for region_val in unique_regions: # Renamed region to region_val to avoid conflict
        if pd.notna(region_val) and region_val != "Unknown":
             sanitized_region_name = re.sub(r'[^A-Za-z0-9_]+', '', region_val.replace(" ", "_"))
             data[f'region_{sanitized_region_name}'] = (data['region'] == region_val).astype(int)

    data['is_general'] = (data['availability'] == 'General').astype(int)
    print("✅ Data preprocessing functions defined and NaN in nutrition columns handled by filling with 0.")
    print("   Region, availability, and origin are now extracted from the 'label' column.")
    return data

# Execute preprocessing
if not df_raw.empty:
    print("\n⚙️ Running data preprocessing...")
    processed_df = preprocess_data(df_raw)
    print("✅ Data preprocessing completed.")
    print("\n📋 Sample of processed data (processed_df) including new 'origin' column:")
    # Display relevant columns including the new 'origin', 'region', 'availability'
    display_cols = ['nama_makanan', 'label', 'region', 'availability', 'origin', 'kalori (kkal)', 'protein (g)']
    # Filter out columns not present in processed_df to avoid KeyError
    display_cols_existing = [col for col in display_cols if col in processed_df.columns]
    print(processed_df[display_cols_existing].head())
    print("\nℹ️ Info of processed data:")
    processed_df.info()

    numeric_cols_check_after = ['kalori (kkal)', 'protein (g)', 'karbohidrat (g)', 'lemak (g)']
    print("\n🔍 NaN check in key numeric columns AFTER preprocessing (should be 0 for these if column exists):")
    for col in numeric_cols_check_after:
        if col in processed_df.columns:
            print(f"  NaNs in {col}: {processed_df[col].isnull().sum()}")
        else:
            print(f"  Column {col} not found for NaN check.")
else:
    print("⚠️ Raw data is empty. Skipping preprocessing.")
    processed_df = pd.DataFrame()

In [None]:
# Cell 5: Define Nutritional Requirements and Balance Score Calculation (MODIFIED)
def get_daily_nutritional_requirements():
    return {
        'kalori (kkal)': 2000, 'energi (kj)': 8368, # Added energi (kj) approx 2000 kcal * 4.184
        'protein (g)': 56, 'karbohidrat (g)': 275,
        'lemak (g)': 78, 'lemak jenuh (g)': 22, 'lemak tak jenuh ganda (g)': 18,
        'lemak tak jenuh tunggal (g)': 25, 'kolesterol (mg)': 300, 'serat (g)': 28,
        'gula (g)': 50, 'sodium (mg)': 2300, 'kalium (mg)': 3500
    }

def calculate_balance_score(current_nutrition, recommended_food_nutrition):
    daily_req = get_daily_nutritional_requirements()
    total_nutrition = {}
    for nutrient in daily_req.keys():
        current_val = current_nutrition.get(nutrient, 0) if isinstance(current_nutrition, dict) else 0
        food_val = recommended_food_nutrition.get(nutrient, 0) if isinstance(recommended_food_nutrition, dict) else 0
        total_nutrition[nutrient] = current_val + food_val

    score = 0
    for nutrient, req_val in daily_req.items(): # Renamed req to req_val
        if nutrient in total_nutrition and req_val > 0:
            val_total = total_nutrition[nutrient] # Renamed total_nutrition[nutrient] to val_total
            if val_total > req_val:
                score += ((val_total - req_val) / req_val) ** 2 * 1.5
            else:
                score += ((req_val - val_total) / req_val) ** 2
    return score

print("✅ Nutritional requirement and balance score functions defined (energi (kj) added to requirements).")

In [None]:
# Cell 6: Define Model Building and Training Data Preparation Functions (MODIFIED)
def build_recommendation_model(input_shape_val): # Renamed input_shape to input_shape_val
    inputs = keras.Input(shape=input_shape_val) # Use input_shape_val
    x = layers.Dense(64, activation='relu')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(32, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    encoded = layers.Dense(16, activation='relu', name='encoded')(x)
    x = layers.Dense(32, activation='relu')(encoded)
    x = layers.BatchNormalization()(x)
    x = layers.Dense(64, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    outputs = layers.Dense(input_shape_val[0], activation='linear')(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
    print("✅ Recommendation model architecture defined.")
    return model

def prepare_training_data(df_input): # Renamed df to df_input
    # MODIFIED: Added 'energi (kj)' to nutritional_cols to match all numeric nutritional columns from the sheet
    nutritional_cols = ['kalori (kkal)', 'energi (kj)', 'lemak (g)', 'lemak jenuh (g)',
                        'lemak tak jenuh ganda (g)', 'lemak tak jenuh tunggal (g)',
                        'kolesterol (mg)', 'protein (g)', 'karbohidrat (g)', 'serat (g)',
                        'gula (g)', 'sodium (mg)', 'kalium (mg)']

    existing_nutritional_cols = [col for col in nutritional_cols if col in df_input.columns]
    if len(existing_nutritional_cols) < len(nutritional_cols):
        print(f"⚠️ Warning: Some nutritional columns are missing from input data. Using: {existing_nutritional_cols}")
    if not existing_nutritional_cols:
        print("❌ Error: No nutritional columns found in the DataFrame. Cannot prepare training data.")
        return None, None, None, None, None, None, []

    # Ensure data is numeric and handle NaNs before scaling
    X_df = df_input[existing_nutritional_cols].copy()
    for col in existing_nutritional_cols: # Ensure all selected columns are numeric
        X_df[col] = pd.to_numeric(X_df[col], errors='coerce').fillna(X_df[col].median() if X_df[col].median() is not np.nan else 0)

    X = X_df.values
    if np.isnan(X).any():
      print("⚠️ Warning: NaNs found in X features before scaling. This may cause issues. Attempting to fill with 0.")
      X = np.nan_to_num(X) # Replace NaNs with 0, a common strategy but check if appropriate

    daily_req = get_daily_nutritional_requirements() # This function was modified in Cell 5
    y = np.array([
        [daily_req.get(col, 0) - (X[i, idx] if X[i, idx] is not None and not np.isnan(X[i, idx]) else 0)
         for idx, col in enumerate(existing_nutritional_cols)]
        for i in range(len(X))
    ])
    if np.isnan(y).any():
      print("⚠️ Warning: NaNs found in y targets before scaling. Attempting to fill with 0.")
      y = np.nan_to_num(y)


    X_scaler = StandardScaler()
    y_scaler = StandardScaler()

    X_scaled = X_scaler.fit_transform(X)
    y_scaled = y_scaler.fit_transform(y)

    if X_scaled.shape[0] < 2: # Need at least 2 samples for train_test_split
        print("❌ Error: Not enough data points to split into training and testing sets.")
        return None, None, None, None, X_scaler, y_scaler, existing_nutritional_cols


    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)
    print("✅ Training data preparation function defined (nutritional_cols now includes 'energi (kj)').")
    return X_train, X_test, y_train, y_test, X_scaler, y_scaler, existing_nutritional_cols

# Prepare training data
if not processed_df.empty:
    print("\n⚙️ Preparing training data...")
    prepared_data = prepare_training_data(processed_df)
    if prepared_data[0] is not None: # Check if X_train is not None
        X_train, X_test, y_train, y_test, X_scaler, y_scaler, nutritional_cols_used = prepared_data
        print("✅ Training data prepared.")
        print(f" Shapes: X_train: {X_train.shape}, X_test: {X_test.shape}")
        print(f" Nutritional columns used for training: {nutritional_cols_used}")

        # Build the model
        print("\n🛠️ Building the model...")
        # Ensure input_shape matches the number of features (nutritional_cols_used)
        model = build_recommendation_model(input_shape_val=(X_train.shape[1],))
        model.summary()
    else:
        print("❌ Training data preparation failed. Cannot build model.")
        # Initialize to prevent errors in later cells if they are run
        X_train, X_test, y_train, y_test, X_scaler, y_scaler, nutritional_cols_used, model = [None]*8
else:
    print("⚠️ Processed data is empty. Skipping training data preparation and model building.")
    X_train, X_test, y_train, y_test, X_scaler, y_scaler, nutritional_cols_used, model = [None]*8

In [None]:
# Cell 7: Define Model Training and Visualization Functions
def train_model_run(model_to_train, X_train_data, y_train_data, X_test_data, y_test_data, epochs=100, batch_size=32): # Renamed parameters
    if model_to_train is None or X_train_data is None:
        print("❌ Model or training data is not available. Skipping training.")
        return None, None
    print("🏃‍♂️ Starting model training...")
    early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
    reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001)
    history = model_to_train.fit(
        X_train_data, y_train_data,
        validation_data=(X_test_data, y_test_data),
        epochs=epochs, batch_size=batch_size,
        callbacks=[early_stopping, reduce_lr], verbose=1
    )
    print("✅ Model training completed.")
    return model_to_train, history

def visualize_training_history(history_data, title_prefix=""): # Renamed history to history_data
    if history_data is None:
        print("⚠️ No training history to visualize.")
        return
    print(f"📊 Visualizing {title_prefix} training history...")
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(history_data.history['loss'], label='Training Loss')
    plt.plot(history_data.history['val_loss'], label='Validation Loss')
    plt.title(f'{title_prefix} Loss')
    plt.xlabel('Epochs'); plt.ylabel('Loss'); plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(history_data.history['mae'], label='Training MAE')
    plt.plot(history_data.history['val_mae'], label='Validation MAE')
    plt.title(f'{title_prefix} Mean Absolute Error')
    plt.xlabel('Epochs'); plt.ylabel('MAE'); plt.legend()
    plt.tight_layout(); plt.show()

# Train the model
if model is not None and X_train is not None and y_train is not None and X_test is not None and y_test is not None :
    trained_model, initial_history = train_model_run(model, X_train, y_train, X_test, y_test, epochs=100) # Reduced for Colab demo
    visualize_training_history(initial_history, title_prefix="Initial Training")
else:
    print("⚠️ Skipping model training as model or data is not available.")
    trained_model, initial_history = None, None

In [None]:
# Cell 8: Define Model Fine-Tuning Function and Execute
def fine_tune_model_run(model_to_fine_tune, X_train_data, y_train_data, X_test_data, y_test_data, epochs=50, batch_size=32, learning_rate=0.0001): # Renamed parameters
    if model_to_fine_tune is None  or X_train_data is None:
        print("❌ Model or training data is not available. Skipping fine-tuning.")
        return None, None
    print("⚙️ Starting model fine-tuning...")
    model_to_fine_tune.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss='mse', metrics=['mae'])
    early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    fine_tune_history = model_to_fine_tune.fit(
        X_train_data, y_train_data,
        validation_data=(X_test_data, y_test_data),
        epochs=epochs, batch_size=batch_size,
        callbacks=[early_stopping], verbose=1
    )
    print("✅ Model fine-tuning completed.")
    return model_to_fine_tune, fine_tune_history

# Fine-tune the model
if trained_model is not None and X_train is not None :
    fine_tuned_model, fine_tune_hist = fine_tune_model_run(trained_model, X_train, y_train, X_test, y_test, epochs=50) # Reduced for Colab
    visualize_training_history(fine_tune_hist, title_prefix="Fine-Tuning")
else:
    print("⚠️ Skipping model fine-tuning as the initial trained model or data is not available.")
    fine_tuned_model = trained_model # Fallback to trained_model if fine-tuning skipped

In [None]:
# Cell 9: Define Recommendation Generation Function (MODIFIED)
def generate_recommendations(model_rec, df_rec, current_foods_rec, user_location_rec, X_scaler_rec, y_scaler_rec, nutritional_cols_rec, n_recommendations=10):
    if model_rec is None or df_rec.empty or X_scaler_rec is None or not nutritional_cols_rec:
        print("❌ Cannot generate recommendations: Model, data, scaler, or nutritional columns missing.")
        return {'current_nutrition': {}, 'daily_requirements': get_daily_nutritional_requirements(), 'recommendations': [], 'potential_total_nutrition': {}}

    current_nutrition = {col: 0 for col in nutritional_cols_rec}
    for food_name, quantity in current_foods_rec:
        food_data = df_rec[df_rec['nama_makanan'] == food_name]
        if not food_data.empty:
            for col in nutritional_cols_rec:
                if col in food_data.columns and pd.notna(food_data.iloc[0][col]):
                    current_nutrition[col] += food_data.iloc[0][col] * quantity

    current_nutrition_array = np.array([[current_nutrition.get(col, 0) for col in nutritional_cols_rec]])
    if not hasattr(X_scaler_rec, 'mean_') or X_scaler_rec.mean_ is None:
        print("⚠️ X_scaler does not appear to be fitted. Recommendations may be inaccurate.")
        current_nutrition_scaled = current_nutrition_array
    else:
        current_nutrition_scaled = X_scaler_rec.transform(current_nutrition_array)

    daily_req = get_daily_nutritional_requirements()
    scores = []
    for idx, row in df_rec.iterrows():
        food_nutrition = {col: row[col] if col in row and pd.notna(row[col]) else 0 for col in nutritional_cols_rec}
        if not any(food_nutrition.values()): continue

        balance_score = calculate_balance_score(current_nutrition, food_nutrition)
        location_bonus = 0
        region_info = str(row.get('region', '')).lower()
        availability_info = str(row.get('availability', '')).lower()
        origin_info = str(row.get('origin', '')).lower() # Get origin info
        is_general_flag = row.get('is_general', 0)

        if user_location_rec and user_location_rec.lower() in region_info: location_bonus = -0.5
        elif user_location_rec and user_location_rec.lower() in availability_info: location_bonus = -0.3
        # Optional: Add bonus if user_location matches origin_info (can be adjusted or removed)
        # if user_location_rec and user_location_rec.lower() in origin_info: location_bonus -= 0.1 
        if is_general_flag == 1: location_bonus -= 0.2
        adjusted_score = balance_score + location_bonus

        scores.append({'food_name': row['nama_makanan'], 'score': adjusted_score,
                       'region': row.get('region', 'N/A'), 
                       'origin': row.get('origin', 'N/A'), # MODIFIED: Include origin
                       'is_general': is_general_flag,
                       'nutrition_data': food_nutrition})
    scores.sort(key=lambda x: x['score'])

    recommendations = []
    seen_foods = set()

    if user_location_rec:
        # Prioritize foods where region or availability matches user_location
        local_foods = [f for f in scores if (user_location_rec.lower() in str(f.get('region', '')).lower() or \
                                            (f['food_name'] in df_rec['nama_makanan'].values and \
                                             user_location_rec.lower() in str(df_rec[df_rec['nama_makanan'] == f['food_name']]['availability'].iloc[0]).lower())) \
                                            and f['food_name'] not in seen_foods]
        for food in local_foods:
            if len(recommendations) < n_recommendations // 2 and food['food_name'] not in seen_foods:
                recommendations.append(food); seen_foods.add(food['food_name'])

    general_foods = [f for f in scores if f['is_general'] == 1 and f['food_name'] not in seen_foods]
    for food in general_foods:
        if len(recommendations) < n_recommendations and food['food_name'] not in seen_foods:
            recommendations.append(food); seen_foods.add(food['food_name'])

    other_foods = [f for f in scores if f['food_name'] not in seen_foods]
    for food in other_foods:
        if len(recommendations) < n_recommendations: recommendations.append(food); seen_foods.add(food['food_name'])

    # MODIFIED: Include origin in formatted_recommendations
    formatted_recommendations = [{'name': f['food_name'], 'score': f['score'], 
                                  'region': f['region'], 'origin': f['origin'],
                                  'is_general': f['is_general'] == 1, 
                                  'nutrition': f['nutrition_data']}
                                 for f in recommendations[:n_recommendations]]

    potential_total = {col: current_nutrition.get(col,0) for col in nutritional_cols_rec}
    for food in formatted_recommendations:
        for col in nutritional_cols_rec: potential_total[col] += food['nutrition'].get(col, 0)

    return {'current_nutrition': current_nutrition, 'daily_requirements': daily_req,
            'recommendations': formatted_recommendations, 'potential_total_nutrition': potential_total}

print("✅ Recommendation generation function defined (includes origin).")

In [None]:
# Cell 10: Perform Inference Test (MODIFIED)
print("🧪 Running inference test...")
if fine_tuned_model is not None and not processed_df.empty and X_scaler is not None and y_scaler is not None and nutritional_cols_used:
    current_foods_example = []
    if 'nama_makanan' in processed_df.columns and len(processed_df) > 0:
        example_food_names = ["Nasi Goreng", "Ayam Goreng"]
        food_names_in_df = processed_df['nama_makanan'].tolist()
        for name in example_food_names:
            if name in food_names_in_df:
                current_foods_example.append((name, 1))
        if not current_foods_example and len(food_names_in_df) >=1 :
             current_foods_example.append((food_names_in_df[0],1))
             if len(food_names_in_df) >=2:
                current_foods_example.append((food_names_in_df[1],1))
        elif not current_foods_example:
             print("⚠️ Could not find example foods or any food in the dataset for inference test.")
    else:
        print("⚠️ 'nama_makanan' column missing or dataset empty, cannot select example foods for test.")

    user_location_example = "Jakarta"

    if current_foods_example:
        print(f" Test current foods: {current_foods_example}, Location: {user_location_example}")
        test_recommendations = generate_recommendations(
            fine_tuned_model, processed_df, current_foods_example, user_location_example,
            X_scaler, y_scaler, nutritional_cols_used
        )

        print("\n--- Inference Test Results ---")
        print("Current Nutrition:")
        for nutrient, value in test_recommendations['current_nutrition'].items(): print(f"  {nutrient}: {value:.2f}")
        print("\nRecommended Foods:")
        if test_recommendations['recommendations']:
            for i, food in enumerate(test_recommendations['recommendations']):
                # MODIFIED: Added origin to the print statement
                print(f"{i+1}. {food['name']} (Region: {food['region']}, Origin: {food['origin']}, General: {food['is_general']}) - Score: {food['score']:.4f}")
        else: print("  No recommendations generated for the test input.")
        print("\nDaily Requirements:")
        for nutrient, value in test_recommendations['daily_requirements'].items(): print(f"  {nutrient}: {value:.2f}")
        print("\nPotential Total Nutrition (Current + All Recommendations):")
        for nutrient, value in test_recommendations['potential_total_nutrition'].items():
            req_val = test_recommendations['daily_requirements'].get(nutrient, 0)
            if req_val > 0: print(f"  {nutrient}: {value:.2f} ({(value / req_val) * 100:.1f}% of daily requirement)")
            else: print(f"  {nutrient}: {value:.2f}")
        print("--- End of Inference Test ---")
    else:
        print("⚠️ Skipping inference test as no current foods could be set up for the test.")

else:
    print("⚠️ Skipping inference test: Model, processed data, scalers, or nutritional columns not available.")

In [None]:
# Cell 11: Define Export, Zip, and Download Function & Execute (MODIFIED)
def export_and_download_artifacts(model_to_export, df_processed_export, X_scaler_exp, y_scaler_exp, nutritional_cols_exp, base_dir='saved_model_food_recommendation'):
    if model_to_export is None or X_scaler_exp is None or y_scaler_exp is None or not nutritional_cols_exp:
        print("❌ Cannot export: Model, scalers, or nutritional columns missing.")
        return
    if df_processed_export.empty:
        print("⚠️ Processed DataFrame is empty. Skipping export of food labels.")

    print(f"🚀 Starting export to '{base_dir}'...")
    if os.path.exists(base_dir): shutil.rmtree(base_dir)
    os.makedirs(base_dir, exist_ok=True)

    # Save model as .keras
    keras_path = os.path.join(base_dir, 'food_recommendation_model.keras')
    model_to_export.save(keras_path)
    print(f"  💾 Model saved in .keras format: {keras_path}")

    # Save model as .h5 (for TensorFlow.js converter or other compatibility)
    h5_path = os.path.join(base_dir, 'food_recommendation_model.h5')
    model_to_export.save(h5_path)
    print(f"  💾 Model saved in .h5 format: {h5_path}")

    # Convert to TensorFlow.js
    tfjs_model_dir = os.path.join(base_dir, 'tfjs_model')
    os.makedirs(tfjs_model_dir, exist_ok=True)
    print(f"  🔄 Attempting to convert {h5_path} to TensorFlow.js format in {tfjs_model_dir}...")
    try:
        conversion_command = f"tensorflowjs_converter --input_format=keras {h5_path} {tfjs_model_dir}"
        print(f"    Executing: {conversion_command}")
        conversion_status = os.system(conversion_command)
        if conversion_status == 0: print(f"    ✅ Model successfully converted to TensorFlow.js.")
        else: print(f"    ❌ Error during TensorFlow.js conversion. Status code: {conversion_status}")
    except Exception as e: print(f"    ❌ Exception during TensorFlow.js conversion: {e}")

    # Save metadata (nutritional columns and daily requirements) and scalers
    metadata_for_json = {'nutritional_columns': nutritional_cols_exp, 'daily_requirements': get_daily_nutritional_requirements()}
    with open(os.path.join(base_dir, 'metadata.json'), 'w') as f: json.dump(metadata_for_json, f, indent=4)
    joblib.dump(X_scaler_exp, os.path.join(base_dir, 'X_scaler.pkl'))
    joblib.dump(y_scaler_exp, os.path.join(base_dir, 'y_scaler.pkl'))
    print(f"  💾 Metadata and scalers saved in {base_dir}")

    # MODIFIED: Export food labels (nama_makanan, label, region, origin, availability) to JSON
    if not df_processed_export.empty:
        food_labels_path = os.path.join(base_dir, 'food_labels_and_origins.json') # Changed to .json
        label_columns_to_export = ['nama_makanan', 'label', 'region', 'availability', 'origin']
        existing_label_columns = [col for col in label_columns_to_export if col in df_processed_export.columns]
        if existing_label_columns:
            # Convert selected columns to a list of dictionaries (records) and then to JSON
            labels_data = df_processed_export[existing_label_columns].to_dict(orient='records')
            with open(food_labels_path, 'w', encoding='utf-8') as f:
                json.dump(labels_data, f, ensure_ascii=False, indent=4)
            print(f"  💾 Food labels and origins saved to: {food_labels_path} (JSON format)")
        else:
            print("⚠️ Could not export food labels as relevant columns are missing from processed_df.")

    # Zip the directory
    zip_filename = f"{base_dir}.zip"
    shutil.make_archive(base_dir, 'zip', '.', base_dir) # Zip contents of base_dir
    print(f"  📦 Directory '{base_dir}' zipped to '{zip_filename}'")

    # Download the zip file
    files.download(zip_filename)
    print(f"  📥 Download of '{zip_filename}' initiated.")
    print("✅ Export, zip, and download process completed.")

# Execute export
if fine_tuned_model is not None and not processed_df.empty and X_scaler is not None and y_scaler is not None and nutritional_cols_used:
    export_and_download_artifacts(fine_tuned_model, processed_df, X_scaler, y_scaler, nutritional_cols_used)
else:
    print("⚠️ Skipping export: Fine-tuned model, processed data, scalers, or nutritional columns not available.")

print("\n🎉🎉🎉 Food Recommendation Model Pipeline in Colab (Script-Style) Finished! 🎉🎉🎉")