In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import joblib



data = pd.read_csv('diabetes_prediction_dataset.csv')

# Preprocessing: Encoding categorical data
label_encoder_gender = LabelEncoder()
label_encoder_smoking = LabelEncoder()
data['gender'] = label_encoder_gender.fit_transform(data['gender'])
data['smoking_history'] = label_encoder_smoking.fit_transform(data['smoking_history'])

# Features and target
X = data.drop('diabetes', axis=1)
y = data['diabetes']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy:.2f}')

# Example user input for prediction
example_input = {
    'gender': 'Female',
    'age': 45,
    'hypertension': 0,
    'heart_disease': 0,
    'smoking_history': 'never',
    'bmi': 28.1,
    'HbA1c_level': 5.9,
    'blood_glucose_level': 120
}

# Convert user input into DataFrame
example_df = pd.DataFrame([example_input])
if example_df['gender'][0] in label_encoder_gender.classes_:
    example_df['gender'] = label_encoder_gender.transform(example_df['gender'])
else:
    example_df['gender'] = -1

if example_df['smoking_history'][0] in label_encoder_smoking.classes_:
    example_df['smoking_history'] = label_encoder_smoking.transform(example_df['smoking_history'])
else:
    example_df['smoking_history'] = -1

# Predict using the trained model
prediction = model.predict(example_df)[0]
print(f'Predicted Diabetes Status: {"Diabetic" if prediction == 1 else "Non-Diabetic"}')

# Function to take user input and make a prediction
def predict_from_user_input():
    user_input = {
        'gender': input("Enter gender (Male/Female/Other): "),
        'age': int(input("Enter age: ")),
        'hypertension': int(input("Enter hypertension (0 for No, 1 for Yes): ")),
        'heart_disease': int(input("Enter heart disease (0 for No, 1 for Yes): ")),
        'smoking_history': input("Enter smoking history (never, former, current, etc.): "),
        'bmi': float(input("Enter BMI: ")),
        'HbA1c_level': float(input("Enter HbA1c level: ")),
        'blood_glucose_level': int(input("Enter blood glucose level: "))
    }

    user_df = pd.DataFrame([user_input])
    if user_df['gender'][0] in label_encoder_gender.classes_:
        user_df['gender'] = label_encoder_gender.transform(user_df['gender'])
    else:
        user_df['gender'] = -1

    if user_df['smoking_history'][0] in label_encoder_smoking.classes_:
        user_df['smoking_history'] = label_encoder_smoking.transform(user_df['smoking_history'])
    else:
        user_df['smoking_history'] = -1

    prediction = model.predict(user_df)[0]
    print(f'Predicted Diabetes Status: {"Diabetic" if prediction == 1 else "Non-Diabetic"}')

predict_from_user_input()
# Save all necessary components to .pkl files
joblib.dump(model, 'diabetes_prediction_model.pkl')
joblib.dump(label_encoder_gender, 'label_encoder_gender.pkl')
joblib.dump(label_encoder_smoking, 'label_encoder_smoking.pkl')





STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Model Accuracy: 0.95
Predicted Diabetes Status: Non-Diabetic
Enter gender (Male/Female/Other): Male
Enter age: 22
Enter hypertension (0 for No, 1 for Yes): 1
Enter heart disease (0 for No, 1 for Yes): 1
Enter smoking history (never, former, current, etc.): never
Enter BMI: 25
Enter HbA1c level: 6
Enter blood glucose level: 140
Predicted Diabetes Status: Diabetic


['label_encoder_smoking.pkl']

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import joblib
import pandas as pd

# Load data
df = pd.read_csv("train.csv")

# Define obesity classification function
def classify_obesity(bmi):
    if bmi < 18.5:
        return "Underweight"
    elif 18.5 <= bmi < 25:
        return "Normal weight"
    elif 25 <= bmi < 30:
        return "Overweight"
    elif 30 <= bmi < 35:
        return "Obesity Type I"
    elif 35 <= bmi < 40:
        return "Obesity Type II"
    else:
        return "Obesity Type III"

# Prepare target variable
obese_classes = ["Obesity_Type_I", "Obesity_Type_II", "Obesity_Type_III"]
df["Obese"] = df["NObeyesdad"].apply(lambda x: 1 if x in obese_classes else 0)
df.drop(columns=["NObeyesdad", "id"], inplace=True)

# Add BMI and classification
df["BMI"] = df["Weight"] / (df["Height"] ** 2)
df["Obesity_Level"] = df["BMI"].apply(classify_obesity)

# Categorical columns encoding (FIXED: No duplicate encoding)
categorical_cols = ["Gender", "family_history_with_overweight", "FAVC", "CAEC", "SMOKE", "SCC", "MTRANS"]
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le

# Features and target (REMOVED redundant one-hot encoding)
X = df.drop(columns=["Obese", "Obesity_Level", "BMI"])
y = df["Obese"]

# Numerical scaling
numerical_cols = ["Age", "Height", "Weight", "FCVC", "NCP", "CH2O", "FAF"]
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train XGBoost model
xgb_model = XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric="logloss", random_state=42)
xgb_model.fit(X_train, y_train)

# Evaluate
y_pred = xgb_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"XGBoost Model Accuracy: {accuracy:.4f}")

# Save all components (FIXED: Save BMI classifier separately)
joblib.dump({
    'model': xgb_model,
    'label_encoders': label_encoders,
    'scaler': scaler,
    'categorical_cols': categorical_cols,
    'numerical_cols': numerical_cols
}, 'obesity_pipeline.pkl')

# Additional: Save BMI classifier for standalone use
joblib.dump(classify_obesity, 'bmi_classifier.pkl')

print("All components saved successfully!")

Parameters: { "use_label_encoder" } are not used.



XGBoost Model Accuracy: 0.9798
All components saved successfully!


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report
import re

df = pd.read_excel("/content/FOOD NUTRITION DATASET.xlsx")


df['categories_en'] = df['categories_en'].fillna('').str.lower()
df['traces_en'] = df['traces_en'].fillna('').str.lower()
df['product_name'] = df['product_name'].fillna('').str.lower()
df['combined_text'] = df['product_name'] + ' ' + df['categories_en'] + ' ' + df['traces_en']



def detect_allergy(text):
    if not isinstance(text, str):
        text = ""

    text = text.lower()

    peanut_keywords = [r'\bpeanut\b', r'\bpeanuts\b']
    gluten_keywords = [
        r'\bgluten\b', r'\bwheat\b', r'\bbarley\b', r'\brye\b',
        r'\bmalt\b', r"brewer's yeast", r'\bspelt\b', r'\btriticale\b',
        r'\bsemolina\b', r'\bfarina\b', r'\bgraham\b', r'\bdurum\b',
        r'\bkamut\b', r'\beinkorn\b', r'\bemmer\b', r'\bcouscous\b'
    ]

    has_peanut = any(re.search(keyword, text) for keyword in peanut_keywords)
    has_gluten = any(re.search(keyword, text) for keyword in gluten_keywords)

    if has_peanut and has_gluten:
        return 3
    elif has_peanut:
        return 1
    elif has_gluten:
        return 2
    else:
        return 0



df['allergy_label'] = df['combined_text'].apply(detect_allergy)
# Ensure combined_text has no NaNs
df['combined_text'] = df['combined_text'].fillna("").astype(str)
#X = X.fillna("")  # Replace NaN with empty string
X = df['combined_text']
y = df['allergy_label']


vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X_vectorized = vectorizer.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

rf = RandomForestClassifier(n_estimators=100, random_state=42)
svm = SVC(probability=True, kernel='linear', random_state=42)
ensemble = VotingClassifier(estimators=[('rf', rf), ('svm', svm)], voting='soft')

ensemble.fit(X_train, y_train)


df['predicted_allergy'] = ensemble.predict(X_vectorized)

# Create allergy-specific datasets
peanut_free = df[df['predicted_allergy'].isin([0, 2])]
gluten_free = df[df['predicted_allergy'].isin([0, 1])]
both_free   = df[df['predicted_allergy'] == 0]


peanut_free.to_csv("peanut_free_diet_dataset.csv", index=False)
gluten_free.to_csv("gluten_free_diet_dataset.csv", index=False)
both_free.to_csv("peanut_gluten_free_diet_dataset.csv", index=False)

print("\n Datasets created:")
print("1. peanut_free_diet_dataset.csv (for peanut allergy)")
print("2. gluten_free_diet_dataset.csv (for gluten allergy)")
print("3. peanut_gluten_free_diet_dataset.csv (for both allergies)")

# You can load the desired one like this:
# user_choice = 1  # replace manually or with input()
# if user_choice == 1:
#     selected_df = peanut_free
# elif user_choice == 2:
#     selected_df = gluten_free
# elif user_choice == 3:
#     selected_df = both_free



 Datasets created:
1. peanut_free_diet_dataset.csv (for peanut allergy)
2. gluten_free_diet_dataset.csv (for gluten allergy)
3. peanut_gluten_free_diet_dataset.csv (for both allergies)


In [3]:
# Vibraneat: Finalized Ensemble Training Script (Fully Refined)

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import joblib

# ---------------------- Load All Datasets ----------------------
food_data = pd.read_excel("FOOD NUTRITION DATASET.xlsx")
ingredient_data = pd.read_csv("food_ingredients_and_allergens.csv")
diet_reco_data = pd.read_csv("diet_recommendations_dataset.csv")

# Merge food with ingredients
merged_data = pd.merge(
    food_data,
    ingredient_data,
    left_on="product_name",
    right_on="Food Product",
    how="left"
).drop(columns=["Food Product"]).rename(columns={"product_name": "name"})

# Merge with diet recommendations if applicable
if 'Food' in diet_reco_data.columns:
    diet_reco_data = diet_reco_data.rename(columns={"Food": "name"})
    merged_data = pd.merge(merged_data, diet_reco_data, on="name", how="left")

# ---------------------- Fill Missing Nutritional Values ----------------------
fallbacks = {
    'calories': 200,
    'carbohydrates': 30,
    'proteins': 10,
    'sugars': 5
}
for col, default in fallbacks.items():
    if col not in merged_data.columns or merged_data[col].isnull().all():
        merged_data[col] = default
    merged_data[col] = merged_data[col].fillna(default)

# ---------------------- Auto-label Meals ----------------------
merged_data['label'] = (
    (merged_data['sugars'] <= 10) &
    (merged_data['carbohydrates'] <= 50) &
    (merged_data['calories'] <= 3000)
).astype(int)

# Inject negatives if all positive
if merged_data['label'].nunique() == 1:
    merged_data.loc[merged_data.sample(frac=0.1, random_state=42).index, 'label'] = 0

print("✅ Class distribution:")
print(merged_data['label'].value_counts())

# ---------------------- Simulate User Profiles ----------------------
np.random.seed(42)
num_simulated = len(merged_data)
sim_profiles = pd.DataFrame({
    "diabetic": np.random.choice([0, 1], size=num_simulated, p=[0.7, 0.3]),
    "obesity_class": np.random.choice([0, 1, 2, 3, 4, 5], size=num_simulated, p=[0.1, 0.3, 0.3, 0.15, 0.1, 0.05])
})

merged_data = pd.concat([merged_data.reset_index(drop=True), sim_profiles], axis=1)

# ---------------------- Features & Labels ----------------------
features = ['calories', 'sugars', 'carbohydrates', 'proteins', 'diabetic', 'obesity_class']
X = merged_data[features]
y = merged_data['label']

# ---------------------- Train-Test Split ----------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, test_size=0.2, random_state=42
)

# ---------------------- Define Ensemble ----------------------
rf = RandomForestClassifier(n_estimators=100, random_state=42)
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
ensemble = VotingClassifier(estimators=[('rf', rf), ('xgb', xgb)], voting='soft')

# ---------------------- Train & Evaluate ----------------------
ensemble.fit(X_train, y_train)
y_pred = ensemble.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"\n✅ Final Ensemble Accuracy: {acc:.4f}")

# ---------------------- Save Model ----------------------
joblib.dump(ensemble, "meal_ensemble_model.pkl")
print("✅ Saved model as 'meal_ensemble_model.pkl'")


✅ Class distribution:
label
1    945481
0    105054
Name: count, dtype: int64


Parameters: { "use_label_encoder" } are not used.




✅ Final Ensemble Accuracy: 0.9000
✅ Saved model as 'meal_ensemble_model.pkl'


In [6]:
# ---------------------- Final Rectified Vibraneat Script (All Datasets Integrated) ----------------------
!pip install XlsxWriter
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import joblib
import warnings
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from xgboost import XGBClassifier
import os

warnings.filterwarnings('ignore')

# ---------------------- Load Datasets ----------------------
try:
    food_data = pd.read_excel("FOOD NUTRITION DATASET.xlsx")
    ingredient_data = pd.read_csv("food_ingredients_and_allergens.csv")
    diet_reco_data = pd.read_csv("diet_recommendations_dataset.csv")
except FileNotFoundError as e:
    print(f"Error loading dataset: {e}")
    exit()

# Merge food with ingredients
merged_food_data = pd.merge(
    food_data,
    ingredient_data,
    left_on="product_name",
    right_on="Food Product",
    how="left"
).drop(columns=["Food Product"]).rename(columns={"product_name": "name"})

# Try merging diet recommendations if common column exists
if 'name' in merged_food_data.columns and 'name' in diet_reco_data.columns:
    merged_food_data = pd.merge(
        merged_food_data,
        diet_reco_data,
        on="name",
        how="left"
    )

# ---------------------- Column Fallbacks ----------------------
fallbacks = {
    'calories': 200,
    'carbohydrates': 30,
    'proteins': 10,
    'sugars': 5
}
for col, default in fallbacks.items():
    if col not in merged_food_data.columns:
        merged_food_data[col] = default

if 'meal_type' not in merged_food_data.columns:
    meal_list = ['Breakfast', 'Lunch', 'Dinner', 'Snack'] * ((len(merged_food_data) // 4) + 1)
    merged_food_data['meal_type'] = meal_list[:len(merged_food_data)]

# ---------------------- Load Models ----------------------
try:
    gender_encoder = joblib.load("label_encoder_gender.pkl")
    smoke_encoder = joblib.load("label_encoder_smoking.pkl")
    diabetes_model = joblib.load("diabetes_prediction_model.pkl")
    obesity_pipeline = joblib.load("obesity_pipeline.pkl")
    obesity_model = obesity_pipeline['model']
    obesity_encoders = obesity_pipeline['label_encoders']
    obesity_scaler = obesity_pipeline['scaler']
    meal_model = joblib.load("meal_ensemble_model.pkl")
except FileNotFoundError as e:
    print(f"Missing required file: {e}")
    exit()

# ---------------------- Helper Functions ----------------------
def classify_obesity(bmi):
    if bmi < 18.5:
        return "Underweight"
    elif 18.5 <= bmi < 25:
        return "Normal weight"
    elif 25 <= bmi < 30:
        return "Overweight"
    elif 30 <= bmi < 35:
        return "Obesity Type I"
    elif 35 <= bmi < 40:
        return "Obesity Type II"
    else:
        return "Obesity Type III"

def preprocess_diabetes_input(df):
    df = df.copy()
    df['gender'] = df['gender'].str.strip().str.capitalize()
    df['gender'] = gender_encoder.transform(df['gender'])
    df['smoking_history'] = df['smoking_history'].str.strip().str.lower()
    df['smoking_history'] = smoke_encoder.transform(df['smoking_history'])
    return df

def preprocess_obesity_input(df):
    df = df.copy()
    categorical_cols = ['Gender', 'family_history_with_overweight', 'FAVC', 'CAEC', 'SMOKE', 'SCC', 'CALC', 'MTRANS']
    for col in categorical_cols:
        if col not in obesity_encoders:
            print(f"Skipping missing encoder for column: {col}")
            continue
        if col == 'Gender':
            df[col] = df[col].str.strip().str.capitalize()
        else:
            df[col] = df[col].str.strip().str.lower().replace(' ', '_')
        try:
            df[col] = obesity_encoders[col].transform(df[col])
        except ValueError as e:
            print(f"⚠️ Encoding error for column {col}: {e}")
            df[col] = -1
    num_cols = ['Age', 'Height', 'Weight', 'FCVC', 'NCP', 'CH2O', 'FAF']
    df[num_cols] = obesity_scaler.transform(df[num_cols])
    return df

def get_user_predictions(diabetes_df, obesity_df_raw):
    diabetic_pred = int(diabetes_model.predict(diabetes_df)[0])
    bmi = obesity_df_raw['Weight'].values[0] / (obesity_df_raw['Height'].values[0] ** 2)
    obesity_class = classify_obesity(bmi)
    return diabetic_pred, obesity_class

def predict_meal_suitability(food_df, diabetic, obesity_class):
    food_df = food_df.copy()
    user_profile = pd.DataFrame([{"diabetic": diabetic, "obesity_class": obesity_class}])
    class_mapping = {
        "Underweight": 0,
        "Normal weight": 1,
        "Overweight": 2,
        "Obesity Type I": 3,
        "Obesity Type II": 4,
        "Obesity Type III": 5
    }
    user_profile["obesity_class"] = class_mapping.get(obesity_class, 1)

    features = food_df[['calories', 'sugars', 'carbohydrates', 'proteins']].copy()

    user_context = pd.concat([user_profile] * len(features), ignore_index=True)
    features = pd.concat([features.reset_index(drop=True), user_context], axis=1)
    features.columns = features.columns.astype(str)

    predictions = meal_model.predict(features)
    return food_df[predictions == 1]

def generate_diet_plan(filtered_food):
    meal_types = ['Breakfast', 'Lunch', 'Dinner', 'Snack']
    weekly_plan = []
    for _ in range(7):
        daily_meals = []
        for meal_type in meal_types:
            options = filtered_food[filtered_food['meal_type'] == meal_type]
            if not options.empty:
                selected = options.sample(min(1, len(options))).iloc[0]
                daily_meals.append({
                    'Meal Type': meal_type,
                    'Food Item': selected['name'],
                    'Calories': selected['calories'],
                    'Protein': selected['proteins'],
                    'Carbs': selected['carbohydrates']
                })
        weekly_plan.append(pd.DataFrame(daily_meals))
    return weekly_plan

def export_plan_to_excel(plan, filename="weekly_meal_plan.xlsx"):
    with pd.ExcelWriter(filename, engine='xlsxwriter') as writer:
        for day_idx, daily_meals in enumerate(plan):
            daily_meals.to_excel(writer, sheet_name=f"Day {day_idx + 1}", index=False)

# ---------------------- Main Flow ----------------------
if __name__ == "__main__":
    choice = input("Use example data? (yes/no): ").strip().lower()

    if choice == "no":
        diabetes_input = pd.DataFrame([{
            'gender': input("Gender (Male/Female): ").strip().capitalize(),
            'age': int(input("Age: ")),
            'hypertension': int(input("Hypertension (0/1): ")),
            'heart_disease': int(input("Heart Disease (0/1): ")),
            'smoking_history': input("Smoking History (never/former/current): ").strip().lower(),
            'bmi': float(input("BMI: ")),
            'HbA1c_level': float(input("HbA1c Level: ")),
            'blood_glucose_level': int(input("Blood Glucose Level: "))
        }])

        obesity_input = pd.DataFrame([{
            "Gender": input("Gender (Male/Female): ").strip().capitalize(),
            "Age": int(input("Age: ")),
            "Height": float(input("Height in meters (e.g., 1.75): ")),
            "Weight": float(input("Weight in kg: ")),
            "family_history_with_overweight": input("Family History (yes/no): ").strip().lower(),
            "FAVC": input("Frequent high-calorie food consumption (yes/no): ").strip().lower(),
            "FCVC": float(input("Vegetable Frequency (1-3): ")),
            "NCP": float(input("Meals per day (1-4): ")),
            "CAEC": input("Eating between meals (no/sometimes/frequently/always): ").strip().lower(),
            "SMOKE": input("Smoker? (yes/no): ").strip().lower(),
            "CH2O": float(input("Water intake (1-3): ")),
            "SCC": input("Calorie tracking (yes/no): ").strip().lower(),
            "FAF": float(input("Physical activity frequency (0-3): ")),
            "TUE": float(input("Screen time (0-2): ")),
            "CALC": input("Alcohol consumption (no/sometimes/frequently/always): ").strip().lower(),
            "MTRANS": input("Transport (public_transportation/walking/bike/car/motorbike): ").strip().lower().replace(' ', '_')
        }])
    else:
        diabetes_input = pd.DataFrame([{
            'gender': 'Male', 'age': 35, 'hypertension': 0, 'heart_disease': 0,
            'smoking_history': 'never', 'bmi': 28.5, 'HbA1c_level': 5.7,
            'blood_glucose_level': 138
        }])
        obesity_input = pd.DataFrame([{
            "Gender": "Male", "Age": 35, "Height": 1.75, "Weight": 95,
            "family_history_with_overweight": "yes", "FAVC": "yes",
            "FCVC": 2.5, "NCP": 3.0, "CAEC": "no", "SMOKE": "no",
            "CH2O": 1.5, "SCC": "no", "FAF": 1.0, "TUE": 2.0,
            "CALC": "no", "MTRANS": "public_transportation"
        }])

    diabetes_processed = preprocess_diabetes_input(diabetes_input)
    obesity_processed = preprocess_obesity_input(obesity_input)
    diabetic, obesity_class = get_user_predictions(diabetes_processed, obesity_input)

    filtered_food = predict_meal_suitability(merged_food_data, diabetic, obesity_class)

    if not filtered_food.empty:
        weekly_plan = generate_diet_plan(filtered_food)
        export_plan_to_excel(weekly_plan)
        print("\nSample Day 1 Plan:")
        print(weekly_plan[0])
    else:
        print("No meals match your health requirements!")

# ---------------------- End ----------------------


Use example data? (yes/no): no
Gender (Male/Female): Male
Age: 22
Hypertension (0/1): 1
Heart Disease (0/1): 1
Smoking History (never/former/current): never
BMI: 22
HbA1c Level: 5
Blood Glucose Level: 140
Gender (Male/Female): Male
Age: 22
Height in meters (e.g., 1.75): 1.6
Weight in kg: 55
Family History (yes/no): yes
Frequent high-calorie food consumption (yes/no): yes
Vegetable Frequency (1-3): 2
Meals per day (1-4): 3
Eating between meals (no/sometimes/frequently/always): no
Smoker? (yes/no): yes
Water intake (1-3): 2
Calorie tracking (yes/no): yes
Physical activity frequency (0-3): 3
Screen time (0-2): 2
Alcohol consumption (no/sometimes/frequently/always): always
Transport (public_transportation/walking/bike/car/motorbike): bike
Skipping missing encoder for column: CALC
⚠️ Encoding error for column MTRANS: y contains previously unseen labels: 'bike'

Sample Day 1 Plan:
   Meal Type                    Food Item  Calories  Protein  Carbs
0  Breakfast  Petit Camembert Au lait cru   