In [None]:
# =====================================
# 🥗 FOOD WEIGHT ANALYZER - 1000 ROWS
# Predicts Weight Gain or Weight Loss
# =====================================

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import pickle

# =====================================
# 1️⃣ LOAD DATA
# =====================================
df = pd.read_csv("food_dataset_1000.csv")
print("✅ Dataset loaded successfully!")
print(df.head())

# =====================================
# 2️⃣ TEXT PREPROCESSING
# =====================================
# We'll use only the 'ingredients' column for prediction
vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1,2))
X = vectorizer.fit_transform(df['ingredients'])
y = df['label']

# =====================================
# 3️⃣ TRAIN-TEST SPLIT
# =====================================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# =====================================
# 4️⃣ MODEL TRAINING
# =====================================
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# =====================================
# 5️⃣ MODEL EVALUATION
# =====================================
y_pred = model.predict(X_test)
print("\n📊 Model Evaluation:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# =====================================
# 6️⃣ SAVE MODEL AND VECTORIZER
# =====================================
with open("food_weight_model_1000.pkl", "wb") as f:
    pickle.dump(model, f)

with open("tfidf_vectorizer_1000.pkl", "wb") as f:
    pickle.dump(vectorizer, f)

print("\n✅ Model and vectorizer saved successfully as .pkl files!")

# =====================================
# 7️⃣ PREDICTION FUNCTION
# =====================================
def predict_food_type(ingredients):
    """
    Input: ingredients (string)
    Output: Weight Gain / Weight Loss
    """
    x_vec = vectorizer.transform([ingredients])
    prediction = model.predict(x_vec)[0]
    print(f"\n🧾 Ingredients: {ingredients}")
    print(f"💡 Prediction: {prediction}")

# =====================================
# 8️⃣ TEST EXAMPLES
# =====================================
examples = [
    "potato, vegetable oil, salt, sugar, cheese powder",
    "rolled oats, chia seeds, almond, no sugar added",
    "chocolate, milk solids, cocoa powder, sugar",
    "quinoa, flax seeds, spinach, olive oil",
    "instant noodles, refined flour, palm oil, seasoning"
]

for ex in examples:
    predict_food_type(ex)


✅ Dataset loaded successfully!
                                         ingredients        label  calories  \
0  chia seeds, tofu, berries, cucumber, oats, pea...  Weight Loss       157   
1  broccoli, green tea extract, pea protein, spin...  Weight Loss       227   
2  low fat yogurt, flax seeds, apple, whey isolat...  Weight Loss       180   
3        lentils, olive oil, flax seeds, pea protein  Weight Loss       169   
4                       chocolate, corn syrup, yeast  Weight Gain       527   

   protein  carbs  fiber  fat  sugar  
0       19     12      7    3      5  
1        9      6      3    8      0  
2        8      8     10    3      4  
3       19     12      8    2      0  
4        9     50      4   25     14  

📊 Model Evaluation:
Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

 Weight Gain       1.00      1.00      1.00       100
 Weight Loss       1.00      1.00      1.00       100

    accuracy                        