In [None]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_extraction.text import TfidfVectorizer

# Load the recipe data (update path if needed)
recipes_df = pd.read_csv(r"C:\Users\tanvi\Downloads\recipes_3 - Copy.csv", low_memory=False)

# Replace NaN values with empty strings for easier filtering
recipes_df = recipes_df.fillna('')

# Take user inputs
tags_input = input("Enter tags (comma-separated): ").strip().lower().split(',')
ingredients_input = input("Enter ingredients (comma-separated): ").strip().lower().split(',')

# Clean up inputs (remove empty strings, spaces)
tags = [t.strip() for t in tags_input if t.strip()]
ingredients = [i.strip() for i in ingredients_input if i.strip()]

# Filter recipes by tags and ingredients
filtered_df = recipes_df.copy()

if tags:
    filtered_df = filtered_df[
        filtered_df['tags'].apply(lambda x: any(tag in str(x).lower() for tag in tags))
    ]

if ingredients:
    filtered_df = filtered_df[
        filtered_df['ingredients'].apply(lambda x: any(ing in str(x).lower() for ing in ingredients))
    ]

# Reset index after filtering
filtered_df = filtered_df.reset_index(drop=True)

# Display how many recipes matched
print(f"✅ Found {len(filtered_df)} matching recipes.")


# defensive: ensure filtered_df variable exists
try:
    filtered_df
except NameError:
    raise NameError("filtered_df is not defined. Create filtered_df by filtering your recipes_df first.")

# If nothing left after filtering
if filtered_df.empty:
    print("⚠️ No recipes found for the given filters! Try different tags or ingredients.")
else:
    # Try to use numeric columns first
    numeric_cols = filtered_df.select_dtypes(include=[np.number]).columns.tolist()
    X = None
    feature_type = None

    if numeric_cols:
        print("Using numeric columns for KNN:", numeric_cols)
        X = filtered_df[numeric_cols].fillna(0).values
        feature_type = 'numeric'
    else:
        # Fallback to TF-IDF on textual columns: recipe_name/name + tags + ingredients
        print("No numeric columns found — falling back to TF-IDF on text columns.")
        # ensure the columns exist so concatenation is safe
        for col in ('recipe_name', 'name', 'tags', 'ingredients'):
            if col not in filtered_df.columns:
                filtered_df[col] = ''
        # prefer recipe_name over name if both present
        title_series = filtered_df['recipe_name'].where(filtered_df['recipe_name'] != '', filtered_df['name'])
        texts = (title_series.fillna('') + ' ' +
                 filtered_df['tags'].fillna('') + ' ' +
                 filtered_df['ingredients'].fillna(''))
        texts = texts.astype(str).tolist()
        tfidf = TfidfVectorizer(max_features=1000, stop_words='english')
        X = tfidf.fit_transform(texts)   # sparse matrix
        feature_type = 'text'

    # Validate feature matrix
    if getattr(X, "shape", None) is None or X.shape[0] == 0:
        print("⚠️ Feature matrix X is empty — cannot run KNN.")
    else:
        n_samples = X.shape[0]
        k = min(10, n_samples)
        if k < 1:
            print("⚠️ Not enough samples for KNN.")
        else:
            # Fit KNN and compute neighbors (safe: indices will exist only after this)
            knn = NearestNeighbors(n_neighbors=k, metric='euclidean')
            knn.fit(X)
            distances, indices = knn.kneighbors(X)

            # Build a list of recommended indices (exclude the item itself if possible)
            rec_idx_order = []
            for i_row in range(indices.shape[0]):
                for neigh in indices[i_row]:
                    if neigh != i_row and neigh not in rec_idx_order:
                        rec_idx_order.append(neigh)
                    if len(rec_idx_order) >= k:
                        break
                if len(rec_idx_order) >= k:
                    break

            # If we couldn't gather unique neighbors, fall back to flattened indices
            if not rec_idx_order:
                rec_idx_order = list(indices.flatten())[:k]
            else:
                rec_idx_order = rec_idx_order[:k]

            # Map back to rows in filtered_df (reset index to align 0..n-1)
            recommended_df = filtered_df.reset_index(drop=True).iloc[rec_idx_order].copy()

            # Optionally sort by rating if it exists
            if 'rating' in recommended_df.columns:
                recommended_df = recommended_df.sort_values(by='rating', ascending=False)

            # Choose display name column
            name_col = 'recipe_name' if 'recipe_name' in recommended_df.columns else ('name' if 'name' in recommended_df.columns else None)

            # Print recommendations safely, checking for missing columns
            print(f"\n✅ Top {len(recommended_df)} Recommendations (feature type: {feature_type}):\n")
            for idx, row in recommended_df.reset_index(drop=True).iterrows():
                # Use .get-like access for Series
                def safe_get(r, col):
                    return r[col] if col in r.index else ''

                display_name = safe_get(row, name_col) if name_col else f"(row {rec_idx_order[idx]})"
                print(f"Recipe {idx+1}: {display_name}")
                if 'description' in row.index and pd.notna(row['description']): print(f"  Description: {row['description']}")
                if 'ingredients' in row.index and pd.notna(row['ingredients']): print(f"  Ingredients: {row['ingredients']}")
                if 'n_ingredients' in row.index and pd.notna(row['n_ingredients']): print(f"  Number of ingredients: {row['n_ingredients']}")
                if 'steps' in row.index and pd.notna(row['steps']): print(f"  Instructions: {row['steps']}")
                if 'minutes' in row.index and pd.notna(row['minutes']): print(f"  Minutes: {row['minutes']}")
                if 'nutrition' in row.index and pd.notna(row['nutrition']): print(f"  Nutrition: {row['nutrition']}")
                if 'rating' in row.index and pd.notna(row['rating']): print(f"  Rating: {row['rating']}")
                print("-" * 60)
