# Model Training (Random Forest)

In [None]:
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load data
# Adjusted path to match local structure
df = pd.read_excel("../data/High_Quality_Diet_Data.xlsx")

TARGET = "Diet_Category"
X = df.drop(columns=[TARGET])
y = df[TARGET]

# Encoding
cat_cols = X.select_dtypes(include="object").columns
num_cols = X.select_dtypes(exclude="object").columns

preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
    ("num", "passthrough", num_cols)
])

X_encoded = preprocessor.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y, test_size=0.2, random_state=42, stratify=y
)

# Train model
model = RandomForestClassifier(
    n_estimators=500,
    max_depth=15,
    class_weight="balanced",
    random_state=42
)

model.fit(X_train, y_train)

# Evaluation
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred) * 100)
print(classification_report(y_test, y_pred))

In [None]:
# Save model & preprocessor
# Adjusted paths to save to backend folder
joblib.dump(model, "../backend/diet_rf_model.pkl")
joblib.dump(preprocessor, "../backend/diet_preprocessor.pkl")

print("âœ… Model & preprocessor saved")