<a href="https://colab.research.google.com/github/B-Wayne00/CapFiles/blob/main/RecipeRecommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install -q streamlit

import streamlit as st
import pandas as pd
import ast
from collections import Counter
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import OneHotEncoder
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import hstack

# ---------- Load and Preprocess Data ----------
@st.cache_data
def load_data():


    url = "https://raw.githubusercontent.com/B-Wayne00/CapFiles/refs/heads/main/RecipesFin.csv"
    df = pd.read_csv(url)



    if 'cuisine_type' not in df.columns:
        st.error("The column 'cuisine_type' is missing in RecipesFin.csv")
        st.stop()

    df['ingredient_list'] = df['ingredients'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else [])

    modifiers = {
        'ground', 'fresh', 'chopped', 'minced', 'diced', 'low-fat', 'boneless',
        'skinless', 'sliced', 'crushed', 'cooked', 'raw', 'shredded', 'frozen',
        'grated', 'large', 'small', 'extra', 'light', 'lean', 'reduced-fat', 'whole'
    }

    def simplify_ingredient(ingredient):
        words = ingredient.lower().split()
        filtered = [word for word in words if word not in modifiers]
        return ' '.join(filtered) if filtered else ingredient.lower()

    df['simplified_ingredients'] = df['ingredient_list'].apply(
        lambda lst: [simplify_ingredient(ing) for ing in lst]
    )
    df['ingredient_string'] = df['simplified_ingredients'].apply(lambda lst: ' '.join(sorted(lst)))

    df = df.drop_duplicates(subset='ingredient_string').reset_index(drop=True)

    vectorizer_dedupe = TfidfVectorizer()
    X_dedupe = vectorizer_dedupe.fit_transform(df['ingredient_string'])
    cos_sim = cosine_similarity(X_dedupe)
    threshold = 0.9
    to_remove = set()
    for i in range(cos_sim.shape[0]):
        for j in range(i + 1, cos_sim.shape[1]):
            if cos_sim[i, j] > threshold:
                to_remove.add(j)

    df = df.drop(df.index[list(to_remove)]).reset_index(drop=True)

    all_ingredients = [ing for sublist in df['simplified_ingredients'] for ing in sublist]
    ingredient_counts = Counter(all_ingredients)
    frequent_ingredients = {ing for ing, count in ingredient_counts.items() if count >= 5}

    df['filtered_ingredients'] = df['simplified_ingredients'].apply(
        lambda lst: [ing for ing in lst if ing in frequent_ingredients]
    )
    df['ingredient_string'] = df['filtered_ingredients'].apply(lambda lst: ' | '.join(lst))

    vectorizer = TfidfVectorizer(
        tokenizer=lambda x: x.split('|'),
        lowercase=False,
        ngram_range=(1, 2),
        token_pattern=None
    )
    X_ingredients = vectorizer.fit_transform(df['ingredient_string'])

    encoder = OneHotEncoder(handle_unknown='ignore')
    X_cuisine = encoder.fit_transform(df[['cuisine_type']])

    X = hstack([X_ingredients, X_cuisine])

    kmeans = KMeans(n_clusters=8, init='k-means++', random_state=42)
    df['cluster'] = kmeans.fit_predict(X)

    knn_model = NearestNeighbors(n_neighbors=6, metric='cosine')
    knn_model.fit(X)

    return df, knn_model, X


# ---------- Recommendation Function ----------
def recommend_similar_recipes(recipe_name, df, knn_model, feature_matrix, n_recommendations=5):
    try:
        recipe_idx = df.index[df['name'] == recipe_name].tolist()[0]
    except IndexError:
        return []

    recipe_vector = feature_matrix[recipe_idx]
    distances, indices = knn_model.kneighbors(recipe_vector, n_neighbors=n_recommendations + 1)
    recommended_indices = indices.flatten()[1:]  # skip the recipe itself

    recommendations = []
    for idx, dist in zip(recommended_indices, distances.flatten()[1:]):
        rec_name = df.iloc[idx]['name']
        rec_cuisine = df.iloc[idx]['cuisine_type']
        recommendations.append({
            'Recipe Name': rec_name,
            'Cuisine': rec_cuisine,
            'Similarity Score': f"{(1 - dist):.3f}"
        })

    return recommendations


# ---------- Streamlit UI ----------
st.set_page_config(page_title="Recipe Recommender", layout="centered")
st.title("🍽️ Recipe Recommender")

df, knn_model, X = load_data()

st.markdown("You can either:")
st.markdown("1. **Select a cuisine and recipe**, _or_  \n2. **Enter a User ID** (for future personalization)")

st.divider()

# Option 1: Choose a cuisine and recipe
st.subheader("🔍 Option 1: Choose a Cuisine and a Recipe")

cuisines = sorted(df['cuisine_type'].unique())
selected_cuisine = st.selectbox("Select a cuisine:", cuisines)

filtered_df = df[df['cuisine_type'] == selected_cuisine]
recipe_names = filtered_df['name'].sort_values().unique()
selected_recipe = st.selectbox("Select a recipe:", recipe_names)

# Option 2: Enter user ID
st.subheader("👤 Option 2: Enter Your User ID")
user_id = st.text_input("User ID (optional):")

# Trigger recommendations
if st.button("Get Recommendations"):
    if user_id:
        st.info(f"🔐 Personalized recommendations for User ID **{user_id}** coming soon!")
        # Placeholder for future user-personalized logic
    else:
        recommendations = recommend_similar_recipes(selected_recipe, df, knn_model, X)
        if recommendations:
            st.success(f"Top recommendations similar to **{selected_recipe}**:")
            st.table(recommendations)
        else:
            st.warning("No recommendations found for the selected recipe.")


2025-06-29 02:30:13.610 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-06-29 02:30:13.614 No runtime found, using MemoryCacheStorageManager
2025-06-29 02:30:17.711 Session state does not function when running a script without `streamlit run`


In [1]:
!streamlit run app.py & npx localtunnel --port 8501

[1G[0K⠙[1G[0K⠹
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.150.223.29:8501[0m
[0m
[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0Kyour url is: https://ready-melons-type.loca.lt
[34m  Stopping...[0m
^C
