In [None]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
import re

# Load your dataset
df = pd.read_csv("recipe_final_with_nutrition.csv")

# Clean ingredients function (no replacements)
def clean_ingredients(ingredients):
    # Basic cleaning: 
    lowercase and remove extra whitespace
    return re.sub(r'\s+', ' ', ingredients.lower().strip())

# Apply cleaning
df['ingredients_list'] = df['ingredients_list'].fillna('').astype(str).apply(clean_ingredients)

# Text vectorization
vectorizer = TfidfVectorizer()
X_ingredients = vectorizer.fit_transform(df['ingredients_list'])

# Select and scale numerical features
numerical_features = ['calories', 'carbohydrates', 'protein', 'cholesterol']
X_numerical = df[numerical_features].fillna(0)
scaler = StandardScaler()
X_numerical_scaled = scaler.fit_transform(X_numerical)

# Combine text and numeric features
X_combined = np.hstack([X_numerical_scaled, X_ingredients.toarray()])

# Fit KNN model
knn = NearestNeighbors(n_neighbors=3, metric='euclidean')
knn.fit(X_combined)

# Recommendation function
def recommend_recipes(input_features):
    input_nutrition_scaled = scaler.transform([input_features[:4]])
    input_ingredients_transformed = vectorizer.transform([input_features[4]])
    input_combined = np.hstack([input_nutrition_scaled, input_ingredients_transformed.toarray()])
    distances, indices = knn.kneighbors(input_combined)
    return df.iloc[indices[0]][['recipe_name', 'ingredients_list', 'image_url']]

# Example input (you can modify this)
example_input = [300, 40, 10, 5, 'pork belly, smoked paprika, kosher salt']
recommendations = recommend_recipes(example_input)

# Display results
recommendations.reset_index(drop=True)
