In [7]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Load the datasets
menu_df = pd.read_csv("icmr_fssai_150_unique_dishes.csv")
health_df = pd.read_csv("icmr_fssai_20_plus_disease_nutrient_mapping.csv")

# Helper function to determine nutrient level
def get_nutrient_level(value, nutrient):
    thresholds = {
        'Calories': (58, 345.9),
        'Protein': (3, 19.7),
        'Fat': (1.1, 24.8),
        'Carbohydrates': (10.1, 43.3)
    }
    low, high = thresholds[nutrient]
    if value < low:
        return 'Low'
    elif value > high:
        return 'High'
    else:
        return 'Moderate'

# Generate training data
training_data = []

for _, food in menu_df.iterrows():
    food_name = food['Food Item']
    
    try:
        nutrient_levels = {
            'calories': get_nutrient_level(float(food['Calories (kcal)']), 'Calories'),
            'protein': get_nutrient_level(float(food['Protein (g)']), 'Protein'),
            'fat': get_nutrient_level(float(food['Fat (g)']), 'Fat'),
            'carbohydrates': get_nutrient_level(float(food['Carbohydrates (g)']), 'Carbohydrates')
        }
    except:
        continue

    for _, condition in health_df.iterrows():
        cond_name = condition['Condition']
        restricted_raw = str(condition['Restricted Nutrients']).strip().lower()
        restricted = [r.strip() for r in restricted_raw.split(',') if r.strip()]
        
        is_suitable = True
        for key, level in nutrient_levels.items():
            rule = f"{level.lower()} {key}"
            if rule in restricted:
                is_suitable = False
                break

        label = 'Yes' if is_suitable else 'No'
        training_data.append({
            'food_item': food_name,
            'condition': cond_name,
            'label': label
        })

# Convert to DataFrame
train_df = pd.DataFrame(training_data)
print("Label distribution:\n", train_df['label'].value_counts())

if train_df['label'].nunique() < 2:
    print("Error: Only one class found.")
else:
    train_df['input_text'] = train_df['food_item'] + " for " + train_df['condition']
    label_encoder = LabelEncoder()
    train_df['target'] = label_encoder.fit_transform(train_df['label'])

    vectorizer = TfidfVectorizer()
    X = vectorizer.fit_transform(train_df['input_text'])
    y = train_df['target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = LogisticRegression()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    print("Model Accuracy:", accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred, target_names=label_encoder.classes_, zero_division=0))

    # BMI calculation
    def calculate_bmi(weight_kg, height_cm):
        height_m = height_cm / 100
        return weight_kg / (height_m ** 2)

    def get_bmi_category(bmi):
        if bmi < 18.5:
            return "Underweight"
        elif bmi < 25:
            return "Normal"
        elif bmi < 30:
            return "Overweight"
        else:
            return "Obese"

    # Combined prediction function without sugar/BP
    def predict_suitability(food_item, condition, weight_kg, height_cm):
        input_text = f"{food_item} for {condition}"
        input_vector = vectorizer.transform([input_text])
        prediction = model.predict(input_vector)[0]
        label = label_encoder.inverse_transform([prediction])[0]

        bmi = calculate_bmi(weight_kg, height_cm)
        bmi_category = get_bmi_category(bmi)

        return label, bmi, bmi_category

    # Sample Prediction
    food = "Idli"
    condition = "Diabetes"  # Or any other condition from your dataset
    weight = 75  # kg
    height = 172  # cm

    label, bmi, bmi_cat = predict_suitability(food, condition, weight, height)

    print(f"Food Suitability: {label}")
    print(f"BMI: {bmi:.2f} ({bmi_cat})")


Label distribution:
 label
Yes    3112
No        8
Name: count, dtype: int64
Model Accuracy: 0.9967948717948718
              precision    recall  f1-score   support

          No       0.00      0.00      0.00         2
         Yes       1.00      1.00      1.00       622

    accuracy                           1.00       624
   macro avg       0.50      0.50      0.50       624
weighted avg       0.99      1.00      1.00       624

Food Suitability: Yes
BMI: 25.35 (Overweight)


In [4]:
def get_best_foods_for_condition(food_list, condition):
    suitable_foods = []

    for food_item in food_list:
        input_text = f"{food_item} for {condition}"
        input_vector = vectorizer.transform([input_text])
        prediction = model.predict(input_vector)[0]
        predicted_label = label_encoder.inverse_transform([prediction])[0]

        if predicted_label == "Yes":
            suitable_foods.append(food_item)

    return suitable_foods

In [5]:
import joblib

# Save the trained model
joblib.dump(model, "food_recommendation_model.pkl")

# Save the TF-IDF vectorizer
joblib.dump(vectorizer, "vectorizer.pkl")

# Save the label encoder (Yes/No)
joblib.dump(label_encoder, "label_encoder.pkl")

['label_encoder.pkl']

In [None]:
pip install streamlit pandas numpy scikit-learn pytesseract pillow PyMuPDF