In [32]:
# =========================
# BLOK 1 – Importlar
# =========================
import os
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, top_k_accuracy_score
import joblib


In [33]:
# =========================
# BLOK 2 – Veriyi okuma
# =========================
csv_path = "../data/Food_and_Nutrition new.csv"   # Gerekirse değiştir
df = pd.read_csv(csv_path)

print("Ham veri şekli:", df.shape)
display(df.head())


Ham veri şekli: (1698, 19)


Unnamed: 0,Gender,Activity Level,Dietary Preference,Breakfast Suggestion,Lunch Suggestion,Dinner Suggestion,Snack Suggestion,Disease,Ages,Height,Weight,Daily Calorie Target,Protein,Sugar,Sodium,Calories,Carbohydrates,Fiber,Fat
0,Male,Moderately Active,Omnivore,Oatmeal with berries and nuts,Grilled chicken salad with mixed greens,Salmon with roasted vegetables,Greek yogurt with fruit,Weight Gain,25,180,80,2000,120,125.0,24.0,2020,250,30.0,60
1,Female,Lightly Active,Vegetarian,Tofu scramble with veggies,Lentil soup with whole wheat bread,Vegetable stir-fry with brown rice,Apple with almond butter,"Weight Gain, Hypertension, Heart Disease",32,165,65,1600,80,100.0,16.0,1480,200,24.0,40
2,Male,Sedentary,Vegan,Tofu and veggie breakfast burrito,Black bean burger on a whole wheat bun,Lentil and vegetable curry,Trail mix,Weight Gain,48,175,95,2200,100,150.0,20.0,2185,300,36.0,65
3,Female,Very Active,Omnivore,Greek yogurt with granola and fruit,Chicken and vegetable stir-fry,Turkey chili with brown rice,Banana with peanut butter,Weight Gain,55,160,70,2500,140,175.0,28.0,2680,350,42.0,80
4,Male,Sedentary,Vegetarian,Scrambled eggs with whole wheat toast and avocado,Quinoa salad with chickpeas and vegetables,Vegetarian chili with cornbread,Fruit and nut mix,Weight Gain,62,170,85,2000,80,125.0,16.0,1815,250,30.0,55


In [34]:
# =========================
# BLOK 3 – Text temizleme + Rare class merge
# =========================

meal_cols = [
    "Breakfast Suggestion",
    "Lunch Suggestion",
    "Dinner Suggestion",
    "Snack Suggestion"
]

def clean_text(s: pd.Series) -> pd.Series:
    return (
        s.astype(str)
         .str.lower()
         .str.strip()
         .str.replace(r"\s+", " ", regex=True)
    )

def merge_rare_classes(df: pd.DataFrame, col: str, min_count: int) -> pd.DataFrame:
    vc = df[col].value_counts()
    rare_classes = vc[vc < min_count].index

    new_label = "other_" + col.replace(" ", "_").lower()
    df[col] = df[col].replace(rare_classes, new_label)
    return df


In [39]:
# =========================
# BLOK 4 – Öğün bazlı rare merge uygulama
# =========================

MIN_COUNT_PER_MEAL = {
    "Breakfast Suggestion": 44,
    "Lunch Suggestion":     35,
    "Dinner Suggestion":    35,
    "Snack Suggestion":     32,
}

for col in meal_cols:
    df[col] = clean_text(df[col])
    df = merge_rare_classes(df, col, min_count=MIN_COUNT_PER_MEAL[col])

print("\nRare merge sonrası sınıf sayıları:")
for col in meal_cols:
    print(f"{col:22s} → {df[col].nunique()} sınıf")



Rare merge sonrası sınıf sayıları:
Breakfast Suggestion   → 10 sınıf
Lunch Suggestion       → 11 sınıf
Dinner Suggestion      → 11 sınıf
Snack Suggestion       → 10 sınıf


In [40]:
# =========================
# BLOK 5 – Feature listeleri
# =========================

feature_cols = [
    "Gender",
    "Activity Level",
    "Dietary Preference",
    "Disease",
    "Ages",
    "Height",
    "Weight",
    "Daily Calorie Target",
    "Protein",
    "Fat",
    "Sugar",
    "Sodium",
]

cat_cols = [
    "Gender",
    "Activity Level",
    "Dietary Preference",
    "Disease",
]

num_cols = [
    "Ages",
    "Height",
    "Weight",
    "Daily Calorie Target",
    "Protein",
    "Fat",
    "Sugar",
    "Sodium",
]

print("Feature sayısı:", len(feature_cols))
print("Kategorik:", cat_cols)
print("Sayısal:", num_cols)


Feature sayısı: 12
Kategorik: ['Gender', 'Activity Level', 'Dietary Preference', 'Disease']
Sayısal: ['Ages', 'Height', 'Weight', 'Daily Calorie Target', 'Protein', 'Fat', 'Sugar', 'Sodium']


In [41]:
# =========================
# BLOK 6 – Model eğitme fonksiyonu
# =========================

def train_meal_model(
    df: pd.DataFrame,
    target_col: str,
    feature_cols,
    cat_cols,
    num_cols,
    min_count: int = 1,
    random_state: int = 42,
    model_name: str = None,
):
    df_t = df.dropna(subset=[target_col]).copy()

    y_all = df_t[target_col]
    class_counts = y_all.value_counts()
    valid_classes = class_counts[class_counts >= min_count].index
    df_t = df_t[df_t[target_col].isin(valid_classes)].copy()

    print(f"\n=== {target_col} ===")
    print("Örnek sayısı:", df_t.shape[0])
    print("Sınıf sayısı:", df_t[target_col].nunique())

    X = df_t[feature_cols]
    y = df_t[target_col]

    X_train, X_test, y_train, y_test = train_test_split(
        X,
        y,
        test_size=0.2,
        random_state=random_state,
        stratify=y
    )

    preprocess = ColumnTransformer(
        transformers=[
            ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
            ("num", StandardScaler(), num_cols),
        ]
    )

    rf = RandomForestClassifier(
        n_estimators=300,
        n_jobs=-1,
        random_state=random_state,
    )

    model = Pipeline([("preprocess", preprocess), ("rf", rf)])
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    top3 = top_k_accuracy_score(y_test, model.predict_proba(X_test), k=3)

    print("Test Accuracy:", acc)
    print("Top-3 Accuracy:", top3)

    if model_name:
        os.makedirs("models", exist_ok=True)
        joblib.dump(model, f"models/{model_name}.pkl")
        print(f"Model kaydedildi → models/{model_name}.pkl")

    return {"acc": acc, "top3": top3, "n_classes": y.nunique()}


In [42]:
# =========================
# BLOK 7 – 4 öğün için modeli eğit
# =========================

results = {}

results["breakfast"] = train_meal_model(
    df=df,
    target_col="Breakfast Suggestion",
    feature_cols=feature_cols,
    cat_cols=cat_cols,
    num_cols=num_cols,
    min_count=1,
    model_name="rf_breakfast"
)

results["lunch"] = train_meal_model(
    df=df,
    target_col="Lunch Suggestion",
    feature_cols=feature_cols,
    cat_cols=cat_cols,
    num_cols=num_cols,
    min_count=1,
    model_name="rf_lunch"
)

results["dinner"] = train_meal_model(
    df=df,
    target_col="Dinner Suggestion",
    feature_cols=feature_cols,
    cat_cols=cat_cols,
    num_cols=num_cols,
    min_count=1,
    model_name="rf_dinner"
)

results["snack"] = train_meal_model(
    df=df,
    target_col="Snack Suggestion",
    feature_cols=feature_cols,
    cat_cols=cat_cols,
    num_cols=num_cols,
    min_count=1,
    model_name="rf_snack"
)

print("\n=== ÖZET ===")
results



=== Breakfast Suggestion ===
Örnek sayısı: 1698
Sınıf sayısı: 10
Test Accuracy: 0.45
Top-3 Accuracy: 0.7411764705882353
Model kaydedildi → models/rf_breakfast.pkl

=== Lunch Suggestion ===
Örnek sayısı: 1698
Sınıf sayısı: 11
Test Accuracy: 0.3911764705882353
Top-3 Accuracy: 0.7088235294117647
Model kaydedildi → models/rf_lunch.pkl

=== Dinner Suggestion ===
Örnek sayısı: 1698
Sınıf sayısı: 11
Test Accuracy: 0.3735294117647059
Top-3 Accuracy: 0.6617647058823529
Model kaydedildi → models/rf_dinner.pkl

=== Snack Suggestion ===
Örnek sayısı: 1698
Sınıf sayısı: 10
Test Accuracy: 0.31470588235294117
Top-3 Accuracy: 0.6735294117647059
Model kaydedildi → models/rf_snack.pkl

=== ÖZET ===


{'breakfast': {'acc': 0.45,
  'top3': np.float64(0.7411764705882353),
  'n_classes': 10},
 'lunch': {'acc': 0.3911764705882353,
  'top3': np.float64(0.7088235294117647),
  'n_classes': 11},
 'dinner': {'acc': 0.3735294117647059,
  'top3': np.float64(0.6617647058823529),
  'n_classes': 11},
 'snack': {'acc': 0.31470588235294117,
  'top3': np.float64(0.6735294117647059),
  'n_classes': 10}}